test/Transforms/InstCombine/X86/x86-sse.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -instcombine -S | FileCheck %s
   3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
   4
   5 define float @test_rcp_ss_0(float %a) {
   6 ; CHECK-LABEL: @test_rcp_ss_0(
   7 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
   8 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
   9 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
  10 ; CHECK-NEXT:    ret float [[TMP3]]
  11 ;
  12   %1 = insertelement <4 x float> undef, float %a, i32 0
  13   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  14   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  15   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  16   %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
  17   %6 = extractelement <4 x float> %5, i32 0
  18   ret float %6
  19 }
  20
  21 define float @test_rcp_ss_1(float %a) {
  22 ; CHECK-LABEL: @test_rcp_ss_1(
  23 ; CHECK-NEXT:    ret float 1.000000e+00
  24 ;
  25   %1 = insertelement <4 x float> undef, float %a, i32 0
  26   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  27   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  28   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  29   %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
  30   %6 = extractelement <4 x float> %5, i32 1
  31   ret float %6
  32 }
  33
  34 define float @test_sqrt_ss_0(float %a) {
  35 ; CHECK-LABEL: @test_sqrt_ss_0(
  36 ; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.sqrt.f32(float %a)
  37 ; CHECK-NEXT:    ret float [[TMP1]]
  38 ;
  39   %1 = insertelement <4 x float> undef, float %a, i32 0
  40   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  41   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  42   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  43   %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
  44   %6 = extractelement <4 x float> %5, i32 0
  45   ret float %6
  46 }
  47
  48 define float @test_sqrt_ss_2(float %a) {
  49 ; CHECK-LABEL: @test_sqrt_ss_2(
  50 ; CHECK-NEXT:    ret float 2.000000e+00
  51 ;
  52   %1 = insertelement <4 x float> undef, float %a, i32 0
  53   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  54   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  55   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  56   %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
  57   %6 = extractelement <4 x float> %5, i32 2
  58   ret float %6
  59 }
  60
  61 define float @test_rsqrt_ss_0(float %a) {
  62 ; CHECK-LABEL: @test_rsqrt_ss_0(
  63 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
  64 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
  65 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
  66 ; CHECK-NEXT:    ret float [[TMP3]]
  67 ;
  68   %1 = insertelement <4 x float> undef, float %a, i32 0
  69   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  70   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  71   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  72   %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
  73   %6 = extractelement <4 x float> %5, i32 0
  74   ret float %6
  75 }
  76
  77 define float @test_rsqrt_ss_3(float %a) {
  78 ; CHECK-LABEL: @test_rsqrt_ss_3(
  79 ; CHECK-NEXT:    ret float 3.000000e+00
  80 ;
  81   %1 = insertelement <4 x float> undef, float %a, i32 0
  82   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  83   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  84   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  85   %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
  86   %6 = extractelement <4 x float> %5, i32 3
  87   ret float %6
  88 }
  89
  90 define float @test_add_ss_0(float %a, float %b) {
  91 ; CHECK-LABEL: @test_add_ss_0(
  92 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd float %a, %b
  93 ; CHECK-NEXT:    ret float [[TMP1]]
  94 ;
  95   %1 = insertelement <4 x float> undef, float %a, i32 0
  96   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  97   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  98   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  99   %5 = insertelement <4 x float> undef, float %b, i32 0
 100   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 101   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 102   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 103   %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
 104   %r = extractelement <4 x float> %9, i32 0
 105   ret float %r
 106 }
 107
 108 define float @test_add_ss_1(float %a, float %b) {
 109 ; CHECK-LABEL: @test_add_ss_1(
 110 ; CHECK-NEXT:    ret float 1.000000e+00
 111 ;
 112   %1 = insertelement <4 x float> undef, float %a, i32 0
 113   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 114   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 115   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 116   %5 = insertelement <4 x float> undef, float %b, i32 0
 117   %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
 118   %7 = extractelement <4 x float> %6, i32 1
 119   ret float %7
 120 }
 121
 122 define float @test_sub_ss_0(float %a, float %b) {
 123 ; CHECK-LABEL: @test_sub_ss_0(
 124 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub float %a, %b
 125 ; CHECK-NEXT:    ret float [[TMP1]]
 126 ;
 127   %1 = insertelement <4 x float> undef, float %a, i32 0
 128   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 129   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 130   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 131   %5 = insertelement <4 x float> undef, float %b, i32 0
 132   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 133   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 134   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 135   %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
 136   %r = extractelement <4 x float> %9, i32 0
 137   ret float %r
 138 }
 139
 140 define float @test_sub_ss_2(float %a, float %b) {
 141 ; CHECK-LABEL: @test_sub_ss_2(
 142 ; CHECK-NEXT:    ret float 2.000000e+00
 143 ;
 144   %1 = insertelement <4 x float> undef, float %a, i32 0
 145   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 146   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 147   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 148   %5 = insertelement <4 x float> undef, float %b, i32 0
 149   %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
 150   %7 = extractelement <4 x float> %6, i32 2
 151   ret float %7
 152 }
 153
 154 define float @test_mul_ss_0(float %a, float %b) {
 155 ; CHECK-LABEL: @test_mul_ss_0(
 156 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul float %a, %b
 157 ; CHECK-NEXT:    ret float [[TMP1]]
 158 ;
 159   %1 = insertelement <4 x float> undef, float %a, i32 0
 160   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 161   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 162   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 163   %5 = insertelement <4 x float> undef, float %b, i32 0
 164   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 165   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 166   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 167   %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
 168   %r = extractelement <4 x float> %9, i32 0
 169   ret float %r
 170 }
 171
 172 define float @test_mul_ss_3(float %a, float %b) {
 173 ; CHECK-LABEL: @test_mul_ss_3(
 174 ; CHECK-NEXT:    ret float 3.000000e+00
 175 ;
 176   %1 = insertelement <4 x float> undef, float %a, i32 0
 177   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 178   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 179   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 180   %5 = insertelement <4 x float> undef, float %b, i32 0
 181   %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
 182   %7 = extractelement <4 x float> %6, i32 3
 183   ret float %7
 184 }
 185
 186 define float @test_div_ss_0(float %a, float %b) {
 187 ; CHECK-LABEL: @test_div_ss_0(
 188 ; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float %a, %b
 189 ; CHECK-NEXT:    ret float [[TMP1]]
 190 ;
 191   %1 = insertelement <4 x float> undef, float %a, i32 0
 192   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 193   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 194   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 195   %5 = insertelement <4 x float> undef, float %b, i32 0
 196   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 197   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 198   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 199   %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
 200   %r = extractelement <4 x float> %9, i32 0
 201   ret float %r
 202 }
 203
 204 define float @test_div_ss_1(float %a, float %b) {
 205 ; CHECK-LABEL: @test_div_ss_1(
 206 ; CHECK-NEXT:    ret float 1.000000e+00
 207 ;
 208   %1 = insertelement <4 x float> undef, float %a, i32 0
 209   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 210   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 211   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 212   %5 = insertelement <4 x float> undef, float %b, i32 0
 213   %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
 214   %7 = extractelement <4 x float> %6, i32 1
 215   ret float %7
 216 }
 217
 218 define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
 219 ; CHECK-LABEL: @test_min_ss(
 220 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b)
 221 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 222 ;
 223   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
 224   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
 225   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
 226   %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
 227   ret <4 x float> %4
 228 }
 229
 230 define float @test_min_ss_0(float %a, float %b) {
 231 ; CHECK-LABEL: @test_min_ss_0(
 232 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 233 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 234 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 235 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
 236 ; CHECK-NEXT:    ret float [[TMP4]]
 237 ;
 238   %1 = insertelement <4 x float> undef, float %a, i32 0
 239   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 240   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 241   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 242   %5 = insertelement <4 x float> undef, float %b, i32 0
 243   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 244   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 245   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 246   %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
 247   %10 = extractelement <4 x float> %9, i32 0
 248   ret float %10
 249 }
 250
 251 define float @test_min_ss_2(float %a, float %b) {
 252 ; CHECK-LABEL: @test_min_ss_2(
 253 ; CHECK-NEXT:    ret float 2.000000e+00
 254 ;
 255   %1 = insertelement <4 x float> undef, float %a, i32 0
 256   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 257   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 258   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 259   %5 = insertelement <4 x float> undef, float %b, i32 0
 260   %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
 261   %7 = extractelement <4 x float> %6, i32 2
 262   ret float %7
 263 }
 264
 265 define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
 266 ; CHECK-LABEL: @test_max_ss(
 267 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b)
 268 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 269 ;
 270   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
 271   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
 272   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
 273   %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
 274   ret <4 x float> %4
 275 }
 276
 277 define float @test_max_ss_0(float %a, float %b) {
 278 ; CHECK-LABEL: @test_max_ss_0(
 279 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 280 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 281 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 282 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
 283 ; CHECK-NEXT:    ret float [[TMP4]]
 284 ;
 285   %1 = insertelement <4 x float> undef, float %a, i32 0
 286   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 287   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 288   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 289   %5 = insertelement <4 x float> undef, float %b, i32 0
 290   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 291   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 292   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 293   %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
 294   %10 = extractelement <4 x float> %9, i32 0
 295   ret float %10
 296 }
 297
 298 define float @test_max_ss_3(float %a, float %b) {
 299 ; CHECK-LABEL: @test_max_ss_3(
 300 ; CHECK-NEXT:    ret float 3.000000e+00
 301 ;
 302   %1 = insertelement <4 x float> undef, float %a, i32 0
 303   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 304   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 305   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 306   %5 = insertelement <4 x float> undef, float %b, i32 0
 307   %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
 308   %7 = extractelement <4 x float> %6, i32 3
 309   ret float %7
 310 }
 311
 312 define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
 313 ; CHECK-LABEL: @test_cmp_ss(
 314 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0)
 315 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 316 ;
 317   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
 318   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
 319   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
 320   %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
 321   ret <4 x float> %4
 322 }
 323
 324 define float @test_cmp_ss_0(float %a, float %b) {
 325 ; CHECK-LABEL: @test_cmp_ss_0(
 326 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 327 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 328 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
 329 ; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
 330 ; CHECK-NEXT:    ret float [[R]]
 331 ;
 332   %1 = insertelement <4 x float> undef, float %a, i32 0
 333   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 334   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 335   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 336   %5 = insertelement <4 x float> undef, float %b, i32 0
 337   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 338   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 339   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 340   %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
 341   %r = extractelement <4 x float> %9, i32 0
 342   ret float %r
 343 }
 344
 345 define float @test_cmp_ss_1(float %a, float %b) {
 346 ; CHECK-LABEL: @test_cmp_ss_1(
 347 ; CHECK-NEXT:    ret float 1.000000e+00
 348 ;
 349   %1 = insertelement <4 x float> undef, float %a, i32 0
 350   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 351   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 352   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 353   %5 = insertelement <4 x float> undef, float %b, i32 0
 354   %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
 355   %7 = extractelement <4 x float> %6, i32 1
 356   ret float %7
 357 }
 358
 359 define i32 @test_comieq_ss_0(float %a, float %b) {
 360 ; CHECK-LABEL: @test_comieq_ss_0(
 361 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 362 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 363 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 364 ; CHECK-NEXT:    ret i32 [[TMP3]]
 365 ;
 366   %1 = insertelement <4 x float> undef, float %a, i32 0
 367   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 368   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 369   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 370   %5 = insertelement <4 x float> undef, float %b, i32 0
 371   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 372   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 373   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 374   %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
 375   ret i32 %9
 376 }
 377
 378 define i32 @test_comige_ss_0(float %a, float %b) {
 379 ; CHECK-LABEL: @test_comige_ss_0(
 380 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 381 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 382 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 383 ; CHECK-NEXT:    ret i32 [[TMP3]]
 384 ;
 385   %1 = insertelement <4 x float> undef, float %a, i32 0
 386   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 387   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 388   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 389   %5 = insertelement <4 x float> undef, float %b, i32 0
 390   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 391   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 392   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 393   %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
 394   ret i32 %9
 395 }
 396
 397 define i32 @test_comigt_ss_0(float %a, float %b) {
 398 ; CHECK-LABEL: @test_comigt_ss_0(
 399 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 400 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 401 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 402 ; CHECK-NEXT:    ret i32 [[TMP3]]
 403 ;
 404   %1 = insertelement <4 x float> undef, float %a, i32 0
 405   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 406   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 407   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 408   %5 = insertelement <4 x float> undef, float %b, i32 0
 409   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 410   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 411   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 412   %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
 413   ret i32 %9
 414 }
 415
 416 define i32 @test_comile_ss_0(float %a, float %b) {
 417 ; CHECK-LABEL: @test_comile_ss_0(
 418 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 419 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 420 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 421 ; CHECK-NEXT:    ret i32 [[TMP3]]
 422 ;
 423   %1 = insertelement <4 x float> undef, float %a, i32 0
 424   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 425   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 426   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 427   %5 = insertelement <4 x float> undef, float %b, i32 0
 428   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 429   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 430   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 431   %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
 432   ret i32 %9
 433 }
 434
 435 define i32 @test_comilt_ss_0(float %a, float %b) {
 436 ; CHECK-LABEL: @test_comilt_ss_0(
 437 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 438 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 439 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 440 ; CHECK-NEXT:    ret i32 [[TMP3]]
 441 ;
 442   %1 = insertelement <4 x float> undef, float %a, i32 0
 443   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 444   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 445   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 446   %5 = insertelement <4 x float> undef, float %b, i32 0
 447   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 448   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 449   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 450   %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
 451   ret i32 %9
 452 }
 453
 454 define i32 @test_comineq_ss_0(float %a, float %b) {
 455 ; CHECK-LABEL: @test_comineq_ss_0(
 456 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 457 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 458 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 459 ; CHECK-NEXT:    ret i32 [[TMP3]]
 460 ;
 461   %1 = insertelement <4 x float> undef, float %a, i32 0
 462   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 463   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 464   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 465   %5 = insertelement <4 x float> undef, float %b, i32 0
 466   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 467   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 468   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 469   %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
 470   ret i32 %9
 471 }
 472
 473 define i32 @test_ucomieq_ss_0(float %a, float %b) {
 474 ; CHECK-LABEL: @test_ucomieq_ss_0(
 475 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 476 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 477 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 478 ; CHECK-NEXT:    ret i32 [[TMP3]]
 479 ;
 480   %1 = insertelement <4 x float> undef, float %a, i32 0
 481   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 482   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 483   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 484   %5 = insertelement <4 x float> undef, float %b, i32 0
 485   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 486   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 487   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 488   %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
 489   ret i32 %9
 490 }
 491
 492 define i32 @test_ucomige_ss_0(float %a, float %b) {
 493 ; CHECK-LABEL: @test_ucomige_ss_0(
 494 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 495 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 496 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 497 ; CHECK-NEXT:    ret i32 [[TMP3]]
 498 ;
 499   %1 = insertelement <4 x float> undef, float %a, i32 0
 500   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 501   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 502   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 503   %5 = insertelement <4 x float> undef, float %b, i32 0
 504   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 505   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 506   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 507   %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
 508   ret i32 %9
 509 }
 510
 511 define i32 @test_ucomigt_ss_0(float %a, float %b) {
 512 ; CHECK-LABEL: @test_ucomigt_ss_0(
 513 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 514 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 515 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 516 ; CHECK-NEXT:    ret i32 [[TMP3]]
 517 ;
 518   %1 = insertelement <4 x float> undef, float %a, i32 0
 519   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 520   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 521   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 522   %5 = insertelement <4 x float> undef, float %b, i32 0
 523   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 524   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 525   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 526   %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
 527   ret i32 %9
 528 }
 529
 530 define i32 @test_ucomile_ss_0(float %a, float %b) {
 531 ; CHECK-LABEL: @test_ucomile_ss_0(
 532 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 533 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 534 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 535 ; CHECK-NEXT:    ret i32 [[TMP3]]
 536 ;
 537   %1 = insertelement <4 x float> undef, float %a, i32 0
 538   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 539   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 540   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 541   %5 = insertelement <4 x float> undef, float %b, i32 0
 542   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 543   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 544   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 545   %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
 546   ret i32 %9
 547 }
 548
 549 define i32 @test_ucomilt_ss_0(float %a, float %b) {
 550 ; CHECK-LABEL: @test_ucomilt_ss_0(
 551 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 552 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 553 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 554 ; CHECK-NEXT:    ret i32 [[TMP3]]
 555 ;
 556   %1 = insertelement <4 x float> undef, float %a, i32 0
 557   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 558   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 559   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 560   %5 = insertelement <4 x float> undef, float %b, i32 0
 561   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 562   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 563   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 564   %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
 565   ret i32 %9
 566 }
 567
 568 define i32 @test_ucomineq_ss_0(float %a, float %b) {
 569 ; CHECK-LABEL: @test_ucomineq_ss_0(
 570 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
 571 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
 572 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 573 ; CHECK-NEXT:    ret i32 [[TMP3]]
 574 ;
 575   %1 = insertelement <4 x float> undef, float %a, i32 0
 576   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 577   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 578   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 579   %5 = insertelement <4 x float> undef, float %b, i32 0
 580   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 581   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 582   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 583   %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
 584   ret i32 %9
 585 }
 586
 587 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
 588 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
 589 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
 590
 591 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
 592 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
 593 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
 594 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
 595 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
 596 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
 597 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
 598
 599 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
 600 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
 601 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
 602 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
 603 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
 604 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
 605
 606 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
 607 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
 608 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
 609 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
 610 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
 611 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)