llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,SSE1
   3 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
   4 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
   5 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
   6 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
   7 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
   8 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
   9 ;
  10 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
  11 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
  12 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX
  13
  14 define i32 @fadd(i32 %arg) {
  15 ; SSE1-LABEL: 'fadd'
  16 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
  17 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
  18 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
  19 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
  20 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
  21 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <2 x double> undef, undef
  22 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> undef, undef
  23 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fadd <8 x double> undef, undef
  24 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  25 ;
  26 ; SSE2-LABEL: 'fadd'
  27 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
  28 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
  29 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
  30 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
  31 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
  32 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
  33 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
  34 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
  35 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  36 ;
  37 ; SSE42-LABEL: 'fadd'
  38 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
  39 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
  40 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
  41 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
  42 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
  43 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
  44 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
  45 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
  46 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  47 ;
  48 ; AVX-LABEL: 'fadd'
  49 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
  50 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
  51 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
  52 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
  53 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
  54 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
  55 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
  56 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
  57 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  58 ;
  59 ; AVX512-LABEL: 'fadd'
  60 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
  61 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
  62 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fadd <8 x float> undef, undef
  63 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fadd <16 x float> undef, undef
  64 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
  65 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
  66 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fadd <4 x double> undef, undef
  67 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fadd <8 x double> undef, undef
  68 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  69 ;
  70 ; SLM-LABEL: 'fadd'
  71 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
  72 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
  73 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
  74 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
  75 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
  76 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
  77 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
  78 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
  79 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  80 ;
  81 ; GLM-LABEL: 'fadd'
  82 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
  83 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
  84 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
  85 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
  86 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
  87 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
  88 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
  89 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
  90 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  91 ;
  92   %F32 = fadd float undef, undef
  93   %V4F32 = fadd <4 x float> undef, undef
  94   %V8F32 = fadd <8 x float> undef, undef
  95   %V16F32 = fadd <16 x float> undef, undef
  96
  97   %F64 = fadd double undef, undef
  98   %V2F64 = fadd <2 x double> undef, undef
  99   %V4F64 = fadd <4 x double> undef, undef
 100   %V8F64 = fadd <8 x double> undef, undef
 101
 102   ret i32 undef
 103 }
 104
 105 define i32 @fsub(i32 %arg) {
 106 ; SSE1-LABEL: 'fsub'
 107 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
 108 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
 109 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
 110 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
 111 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
 112 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> undef, undef
 113 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> undef, undef
 114 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> undef, undef
 115 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 116 ;
 117 ; SSE2-LABEL: 'fsub'
 118 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
 119 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
 120 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
 121 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
 122 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
 123 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
 124 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
 125 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
 126 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 127 ;
 128 ; SSE42-LABEL: 'fsub'
 129 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
 130 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
 131 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
 132 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
 133 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
 134 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
 135 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
 136 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
 137 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 138 ;
 139 ; AVX-LABEL: 'fsub'
 140 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
 141 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
 142 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
 143 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
 144 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
 145 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
 146 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
 147 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
 148 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 149 ;
 150 ; AVX512-LABEL: 'fsub'
 151 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
 152 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
 153 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> undef, undef
 154 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> undef, undef
 155 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
 156 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
 157 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> undef, undef
 158 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> undef, undef
 159 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 160 ;
 161 ; SLM-LABEL: 'fsub'
 162 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
 163 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
 164 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
 165 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
 166 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
 167 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
 168 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
 169 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
 170 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 171 ;
 172 ; GLM-LABEL: 'fsub'
 173 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
 174 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
 175 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
 176 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
 177 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
 178 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
 179 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
 180 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
 181 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 182 ;
 183   %F32 = fsub float undef, undef
 184   %V4F32 = fsub <4 x float> undef, undef
 185   %V8F32 = fsub <8 x float> undef, undef
 186   %V16F32 = fsub <16 x float> undef, undef
 187
 188   %F64 = fsub double undef, undef
 189   %V2F64 = fsub <2 x double> undef, undef
 190   %V4F64 = fsub <4 x double> undef, undef
 191   %V8F64 = fsub <8 x double> undef, undef
 192
 193   ret i32 undef
 194 }
 195
 196 define i32 @fneg_idiom(i32 %arg) {
 197 ; SSE1-LABEL: 'fneg_idiom'
 198 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
 199 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
 200 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
 201 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 202 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
 203 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
 204 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
 205 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 206 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 207 ;
 208 ; SSE2-LABEL: 'fneg_idiom'
 209 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
 210 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
 211 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
 212 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 213 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
 214 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
 215 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
 216 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 217 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 218 ;
 219 ; SSE42-LABEL: 'fneg_idiom'
 220 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
 221 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
 222 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
 223 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 224 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
 225 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
 226 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
 227 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 228 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 229 ;
 230 ; AVX-LABEL: 'fneg_idiom'
 231 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
 232 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
 233 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
 234 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 235 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
 236 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
 237 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
 238 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 239 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 240 ;
 241 ; AVX512-LABEL: 'fneg_idiom'
 242 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
 243 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
 244 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
 245 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 246 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
 247 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
 248 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
 249 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 250 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 251 ;
 252 ; SLM-LABEL: 'fneg_idiom'
 253 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
 254 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
 255 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
 256 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 257 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
 258 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
 259 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
 260 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 261 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 262 ;
 263 ; GLM-LABEL: 'fneg_idiom'
 264 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
 265 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
 266 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
 267 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 268 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
 269 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
 270 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
 271 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 272 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 273 ;
 274   %F32 = fsub float -0.0, undef
 275   %V4F32 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, undef
 276   %V8F32 = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, undef
 277   %V16F32 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, undef
 278
 279   %F64 = fsub double -0.0, undef
 280   %V2F64 = fsub <2 x double> <double -0.0, double -0.0>, undef
 281   %V4F64 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, undef
 282   %V8F64 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, undef
 283
 284   ret i32 undef
 285 }
 286
 287 define i32 @fneg(i32 %arg) {
 288 ; SSE1-LABEL: 'fneg'
 289 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float undef
 290 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> undef
 291 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef
 292 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> undef
 293 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fneg double undef
 294 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fneg <2 x double> undef
 295 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fneg <4 x double> undef
 296 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fneg <8 x double> undef
 297 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 298 ;
 299 ; SSE2-LABEL: 'fneg'
 300 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 301 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 302 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 303 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
 304 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 305 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 306 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 307 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
 308 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 309 ;
 310 ; SSE42-LABEL: 'fneg'
 311 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 312 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 313 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 314 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
 315 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 316 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 317 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 318 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
 319 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 320 ;
 321 ; AVX-LABEL: 'fneg'
 322 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 323 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 324 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 325 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
 326 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 327 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 328 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 329 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
 330 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 331 ;
 332 ; AVX512-LABEL: 'fneg'
 333 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 334 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 335 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 336 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fneg <16 x float> undef
 337 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 338 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 339 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 340 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fneg <8 x double> undef
 341 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 342 ;
 343 ; SLM-LABEL: 'fneg'
 344 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 345 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 346 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 347 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
 348 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 349 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 350 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 351 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
 352 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 353 ;
 354 ; GLM-LABEL: 'fneg'
 355 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 356 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 357 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 358 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
 359 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 360 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 361 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 362 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
 363 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 364 ;
 365   %F32 = fneg float undef
 366   %V4F32 = fneg <4 x float> undef
 367   %V8F32 = fneg <8 x float> undef
 368   %V16F32 = fneg <16 x float> undef
 369
 370   %F64 = fneg double undef
 371   %V2F64 = fneg <2 x double> undef
 372   %V4F64 = fneg <4 x double> undef
 373   %V8F64 = fneg <8 x double> undef
 374
 375   ret i32 undef
 376 }
 377
 378 define i32 @fmul(i32 %arg) {
 379 ; SSE1-LABEL: 'fmul'
 380 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
 381 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
 382 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef
 383 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
 384 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
 385 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> undef, undef
 386 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef
 387 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fmul <8 x double> undef, undef
 388 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 389 ;
 390 ; SSE2-LABEL: 'fmul'
 391 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
 392 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
 393 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef
 394 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
 395 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
 396 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
 397 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef
 398 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
 399 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 400 ;
 401 ; SSE42-LABEL: 'fmul'
 402 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
 403 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
 404 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef
 405 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
 406 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
 407 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
 408 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef
 409 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
 410 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 411 ;
 412 ; AVX-LABEL: 'fmul'
 413 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
 414 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
 415 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef
 416 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
 417 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
 418 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
 419 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef
 420 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
 421 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 422 ;
 423 ; AVX512-LABEL: 'fmul'
 424 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
 425 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
 426 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fmul <8 x float> undef, undef
 427 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fmul <16 x float> undef, undef
 428 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
 429 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
 430 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fmul <4 x double> undef, undef
 431 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fmul <8 x double> undef, undef
 432 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 433 ;
 434 ; SLM-LABEL: 'fmul'
 435 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
 436 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
 437 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef
 438 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
 439 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
 440 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
 441 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef
 442 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
 443 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 444 ;
 445 ; GLM-LABEL: 'fmul'
 446 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
 447 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
 448 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef
 449 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
 450 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
 451 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
 452 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef
 453 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
 454 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 455 ;
 456   %F32 = fmul float undef, undef
 457   %V4F32 = fmul <4 x float> undef, undef
 458   %V8F32 = fmul <8 x float> undef, undef
 459   %V16F32 = fmul <16 x float> undef, undef
 460
 461   %F64 = fmul double undef, undef
 462   %V2F64 = fmul <2 x double> undef, undef
 463   %V4F64 = fmul <4 x double> undef, undef
 464   %V8F64 = fmul <8 x double> undef, undef
 465
 466   ret i32 undef
 467 }
 468
 469 define i32 @fdiv(i32 %arg) {
 470 ; SSE1-LABEL: 'fdiv'
 471 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
 472 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 473 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 474 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 475 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
 476 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 477 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 478 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 479 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 480 ;
 481 ; SSE2-LABEL: 'fdiv'
 482 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
 483 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 484 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 485 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 486 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
 487 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 488 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 489 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 490 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 491 ;
 492 ; SSE42-LABEL: 'fdiv'
 493 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
 494 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 495 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 496 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 497 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
 498 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 499 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 500 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 501 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 502 ;
 503 ; AVX-LABEL: 'fdiv'
 504 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
 505 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 506 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 507 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 508 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
 509 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 510 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 511 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 512 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 513 ;
 514 ; AVX512-LABEL: 'fdiv'
 515 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
 516 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 517 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 518 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 519 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
 520 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 521 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 522 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 523 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 524 ;
 525 ; SLM-LABEL: 'fdiv'
 526 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
 527 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 528 ; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 529 ; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 530 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
 531 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 532 ; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 533 ; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 534 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 535 ;
 536 ; GLM-LABEL: 'fdiv'
 537 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef
 538 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 539 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 540 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 541 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef
 542 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 543 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 544 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 545 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 546 ;
 547   %F32 = fdiv float undef, undef
 548   %V4F32 = fdiv <4 x float> undef, undef
 549   %V8F32 = fdiv <8 x float> undef, undef
 550   %V16F32 = fdiv <16 x float> undef, undef
 551
 552   %F64 = fdiv double undef, undef
 553   %V2F64 = fdiv <2 x double> undef, undef
 554   %V4F64 = fdiv <4 x double> undef, undef
 555   %V8F64 = fdiv <8 x double> undef, undef
 556
 557   ret i32 undef
 558 }
 559
 560 define i32 @frem(i32 %arg) {
 561 ; CHECK-LABEL: 'frem'
 562 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float undef, undef
 563 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = frem <4 x float> undef, undef
 564 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = frem <8 x float> undef, undef
 565 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = frem <16 x float> undef, undef
 566 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double undef, undef
 567 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef
 568 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = frem <4 x double> undef, undef
 569 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = frem <8 x double> undef, undef
 570 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 571 ;
 572   %F32 = frem float undef, undef
 573   %V4F32 = frem <4 x float> undef, undef
 574   %V8F32 = frem <8 x float> undef, undef
 575   %V16F32 = frem <16 x float> undef, undef
 576
 577   %F64 = frem double undef, undef
 578   %V2F64 = frem <2 x double> undef, undef
 579   %V4F64 = frem <4 x double> undef, undef
 580   %V8F64 = frem <8 x double> undef, undef
 581
 582   ret i32 undef
 583 }
 584
 585 define i32 @fsqrt(i32 %arg) {
 586 ; SSE1-LABEL: 'fsqrt'
 587 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 588 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 589 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 590 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 591 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 592 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 593 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 594 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 595 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 596 ;
 597 ; SSE2-LABEL: 'fsqrt'
 598 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 599 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 600 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 601 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 602 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 603 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 604 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 605 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 606 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 607 ;
 608 ; SSE42-LABEL: 'fsqrt'
 609 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 610 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 611 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 612 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 613 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 614 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 615 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 616 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 617 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 618 ;
 619 ; AVX-LABEL: 'fsqrt'
 620 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 621 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 622 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 623 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 624 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 625 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 626 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 627 ; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 628 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 629 ;
 630 ; AVX512-LABEL: 'fsqrt'
 631 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 632 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 633 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 634 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 635 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 636 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 637 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 638 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 639 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 640 ;
 641 ; SLM-LABEL: 'fsqrt'
 642 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 643 ; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 644 ; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 645 ; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 646 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 647 ; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 648 ; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 649 ; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 650 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 651 ;
 652 ; GLM-LABEL: 'fsqrt'
 653 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 654 ; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 655 ; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 656 ; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 657 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 658 ; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 659 ; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 660 ; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 661 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 662 ;
 663   %F32 = call float @llvm.sqrt.f32(float undef)
 664   %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 665   %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 666   %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 667
 668   %F64 = call double @llvm.sqrt.f64(double undef)
 669   %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 670   %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 671   %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 672
 673   ret i32 undef
 674 }
 675
 676 define i32 @fabs(i32 %arg) {
 677 ; SSE1-LABEL: 'fabs'
 678 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 679 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 680 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 681 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 682 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 683 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 684 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 685 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 686 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 687 ;
 688 ; SSE2-LABEL: 'fabs'
 689 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 690 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 691 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 692 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 693 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 694 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 695 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 696 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 697 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 698 ;
 699 ; SSE42-LABEL: 'fabs'
 700 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 701 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 702 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 703 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 704 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 705 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 706 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 707 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 708 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 709 ;
 710 ; AVX-LABEL: 'fabs'
 711 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 712 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 713 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 714 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 715 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 716 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 717 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 718 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 719 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 720 ;
 721 ; AVX512-LABEL: 'fabs'
 722 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 723 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 724 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 725 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 726 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 727 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 728 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 729 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 730 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 731 ;
 732 ; SLM-LABEL: 'fabs'
 733 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 734 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 735 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 736 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 737 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 738 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 739 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 740 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 741 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 742 ;
 743 ; GLM-LABEL: 'fabs'
 744 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 745 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 746 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 747 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 748 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 749 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 750 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 751 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 752 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 753 ;
 754   %F32 = call float @llvm.fabs.f32(float undef)
 755   %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 756   %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 757   %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 758
 759   %F64 = call double @llvm.fabs.f64(double undef)
 760   %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 761   %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 762   %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 763
 764   ret i32 undef
 765 }
 766
 767 define i32 @fcopysign(i32 %arg) {
 768 ; SSE1-LABEL: 'fcopysign'
 769 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 770 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 771 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 772 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 773 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 774 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 775 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 776 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 777 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 778 ;
 779 ; SSE2-LABEL: 'fcopysign'
 780 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 781 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 782 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 783 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 784 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 785 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 786 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 787 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 788 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 789 ;
 790 ; SSE42-LABEL: 'fcopysign'
 791 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 792 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 793 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 794 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 795 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 796 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 797 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 798 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 799 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 800 ;
 801 ; AVX-LABEL: 'fcopysign'
 802 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 803 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 804 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 805 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 806 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 807 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 808 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 809 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 810 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 811 ;
 812 ; AVX512-LABEL: 'fcopysign'
 813 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 814 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 815 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 816 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 817 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 818 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 819 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 820 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 821 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 822 ;
 823 ; SLM-LABEL: 'fcopysign'
 824 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 825 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 826 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 827 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 828 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 829 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 830 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 831 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 832 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 833 ;
 834 ; GLM-LABEL: 'fcopysign'
 835 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 836 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 837 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 838 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 839 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 840 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 841 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 842 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 843 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 844 ;
 845   %F32 = call float @llvm.copysign.f32(float undef, float undef)
 846   %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 847   %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 848   %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 849
 850   %F64 = call double @llvm.copysign.f64(double undef, double undef)
 851   %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 852   %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 853   %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 854
 855   ret i32 undef
 856 }
 857
 858 define i32 @fma(i32 %arg) {
 859 ; SSE1-LABEL: 'fma'
 860 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 861 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 862 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 863 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 864 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 865 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 866 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 867 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 868 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 869 ;
 870 ; SSE2-LABEL: 'fma'
 871 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 872 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 873 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 874 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 875 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 876 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 877 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 878 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 879 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 880 ;
 881 ; SSE42-LABEL: 'fma'
 882 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 883 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 884 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 885 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 886 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 887 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 888 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 889 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 890 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 891 ;
 892 ; AVX-LABEL: 'fma'
 893 ; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 894 ; AVX-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 895 ; AVX-NEXT:  Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 896 ; AVX-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 897 ; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 898 ; AVX-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 899 ; AVX-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 900 ; AVX-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 901 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 902 ;
 903 ; AVX512-LABEL: 'fma'
 904 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 905 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 906 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 907 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 908 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 909 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 910 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 911 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 912 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 913 ;
 914 ; SLM-LABEL: 'fma'
 915 ; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 916 ; SLM-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 917 ; SLM-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 918 ; SLM-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 919 ; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 920 ; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 921 ; SLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 922 ; SLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 923 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 924 ;
 925 ; GLM-LABEL: 'fma'
 926 ; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 927 ; GLM-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 928 ; GLM-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 929 ; GLM-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 930 ; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 931 ; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 932 ; GLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 933 ; GLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 934 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 935 ;
 936   %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 937   %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 938   %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 939   %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 940
 941   %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 942   %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 943   %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 944   %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 945
 946   ret i32 undef
 947 }
 948
 949 define i32 @rint(i32 %arg) {
 950 ; SSE1-LABEL: 'rint'
 951 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 952 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 953 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 954 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 955 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 956 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 957 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 958 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 959 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 960 ;
 961 ; SSE2-LABEL: 'rint'
 962 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 963 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 964 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 965 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 966 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 967 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 968 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 969 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 970 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 971 ;
 972 ; SSE42-LABEL: 'rint'
 973 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 974 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 975 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 976 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 977 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 978 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 979 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 980 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 981 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 982 ;
 983 ; AVX-LABEL: 'rint'
 984 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 985 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 986 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 987 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 988 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 989 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 990 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 991 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 992 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 993 ;
 994 ; AVX512-LABEL: 'rint'
 995 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 996 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 997 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 998 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 999 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
1000 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
1001 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
1002 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
1003 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1004 ;
1005 ; SLM-LABEL: 'rint'
1006 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
1007 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
1008 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
1009 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
1010 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
1011 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
1012 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
1013 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
1014 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1015 ;
1016 ; GLM-LABEL: 'rint'
1017 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
1018 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
1019 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
1020 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
1021 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
1022 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
1023 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
1024 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
1025 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1026 ;
1027   %F32 = call float @llvm.rint.f32.f32(float undef)
1028   %V4F32 = call <4 x float> @llvm.rint.v4f32.v4f32(<4 x float> undef)
1029   %V8F32 = call <8 x float> @llvm.rint.v8f32.v8f32(<8 x float> undef)
1030   %V16F32 = call <16 x float> @llvm.rint.v16f32.v16f32(<16 x float> undef)
1031
1032   %F64 = call double @llvm.rint.f64.f64(double undef)
1033   %V2F64 = call <2 x double> @llvm.rint.v2f64.v2f64(<2 x double> undef)
1034   %V4F64 = call <4 x double> @llvm.rint.v4f64.v4f64(<4 x double> undef)
1035   %V8F64 = call <8 x double> @llvm.rint.v8f64.v8f64(<8 x double> undef)
1036
1037   ret i32 undef
1038 }
1039
1040 define i32 @lrint(i32 %arg) {
1041 ; SSE1-LABEL: 'lrint'
1042 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1043 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1044 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1045 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1046 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1047 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1048 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1049 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1050 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1051 ;
1052 ; SSE2-LABEL: 'lrint'
1053 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1054 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1055 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1056 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1057 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1058 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1059 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1060 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1061 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1062 ;
1063 ; SSE42-LABEL: 'lrint'
1064 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1065 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1066 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1067 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1068 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1069 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1070 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1071 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1072 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1073 ;
1074 ; AVX-LABEL: 'lrint'
1075 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1076 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1077 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1078 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1079 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1080 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1081 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1082 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1083 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1084 ;
1085 ; AVX512-LABEL: 'lrint'
1086 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1087 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1088 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1089 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1090 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1091 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1092 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1093 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1094 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1095 ;
1096 ; SLM-LABEL: 'lrint'
1097 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1098 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1099 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1100 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1101 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1102 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1103 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1104 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1105 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1106 ;
1107 ; GLM-LABEL: 'lrint'
1108 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1109 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1110 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1111 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1112 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1113 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1114 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1115 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1116 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1117 ;
1118   %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1119   %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1120   %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1121   %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1122
1123   %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1124   %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1125   %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1126   %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1127
1128   ret i32 undef
1129 }
1130
1131 define i32 @llrint(i32 %arg) {
1132 ; CHECK-LABEL: 'llrint'
1133 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
1134 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
1135 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
1136 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
1137 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
1138 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
1139 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
1140 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
1141 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1142 ;
1143   %F32 = call i64 @llvm.llrint.i64.f32(float undef)
1144   %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
1145   %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
1146   %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
1147
1148   %F64 = call i64 @llvm.llrint.i64.f64(double undef)
1149   %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
1150   %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
1151   %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
1152
1153   ret i32 undef
1154 }
1155
1156 declare float @llvm.sqrt.f32(float)
1157 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
1158 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
1159 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
1160
1161 declare double @llvm.sqrt.f64(double)
1162 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
1163 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
1164 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
1165
1166 declare float @llvm.fabs.f32(float)
1167 declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
1168 declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
1169 declare <16 x float> @llvm.fabs.v16f32(<16 x float>)
1170
1171 declare double @llvm.fabs.f64(double)
1172 declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
1173 declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
1174 declare <8 x double> @llvm.fabs.v8f64(<8 x double>)
1175
1176 declare float @llvm.copysign.f32(float, float)
1177 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
1178 declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)
1179 declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>)
1180
1181 declare double @llvm.copysign.f64(double, double)
1182 declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
1183 declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>)
1184 declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>)
1185
1186 declare float @llvm.fma.f32(float, float, float)
1187 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
1188 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
1189 declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
1190
1191 declare double @llvm.fma.f64(double, double, double)
1192 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
1193 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
1194 declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)