llvm/test/Transforms/InstCombine/X86/x86-sse.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
   3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
   4
   5 define float @test_rcp_ss_0(float %a) {
   6 ; CHECK-LABEL: @test_rcp_ss_0(
   7 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
   8 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
   9 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
  10 ; CHECK-NEXT:    ret float [[TMP3]]
  11 ;
  12   %1 = insertelement <4 x float> undef, float %a, i32 0
  13   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  14   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  15   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  16   %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
  17   %6 = extractelement <4 x float> %5, i32 0
  18   ret float %6
  19 }
  20
  21 define float @test_rcp_ss_1(float %a) {
  22 ; CHECK-LABEL: @test_rcp_ss_1(
  23 ; CHECK-NEXT:    ret float 1.000000e+00
  24 ;
  25   %1 = insertelement <4 x float> undef, float %a, i32 0
  26   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  27   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  28   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  29   %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
  30   %6 = extractelement <4 x float> %5, i32 1
  31   ret float %6
  32 }
  33
  34 define float @test_sqrt_ss_0(float %a) {
  35 ; CHECK-LABEL: @test_sqrt_ss_0(
  36 ; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[A:%.*]])
  37 ; CHECK-NEXT:    ret float [[TMP1]]
  38 ;
  39   %1 = insertelement <4 x float> undef, float %a, i32 0
  40   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  41   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  42   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  43   %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
  44   %6 = extractelement <4 x float> %5, i32 0
  45   ret float %6
  46 }
  47
  48 define float @test_sqrt_ss_2(float %a) {
  49 ; CHECK-LABEL: @test_sqrt_ss_2(
  50 ; CHECK-NEXT:    ret float 2.000000e+00
  51 ;
  52   %1 = insertelement <4 x float> undef, float %a, i32 0
  53   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  54   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  55   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  56   %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
  57   %6 = extractelement <4 x float> %5, i32 2
  58   ret float %6
  59 }
  60
  61 define float @test_rsqrt_ss_0(float %a) {
  62 ; CHECK-LABEL: @test_rsqrt_ss_0(
  63 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
  64 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
  65 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
  66 ; CHECK-NEXT:    ret float [[TMP3]]
  67 ;
  68   %1 = insertelement <4 x float> undef, float %a, i32 0
  69   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  70   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  71   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  72   %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
  73   %6 = extractelement <4 x float> %5, i32 0
  74   ret float %6
  75 }
  76
  77 define float @test_rsqrt_ss_3(float %a) {
  78 ; CHECK-LABEL: @test_rsqrt_ss_3(
  79 ; CHECK-NEXT:    ret float 3.000000e+00
  80 ;
  81   %1 = insertelement <4 x float> undef, float %a, i32 0
  82   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  83   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  84   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  85   %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
  86   %6 = extractelement <4 x float> %5, i32 3
  87   ret float %6
  88 }
  89
  90 define float @test_add_ss_0(float %a, float %b) {
  91 ; CHECK-LABEL: @test_add_ss_0(
  92 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd float [[A:%.*]], [[B:%.*]]
  93 ; CHECK-NEXT:    ret float [[TMP1]]
  94 ;
  95   %1 = insertelement <4 x float> undef, float %a, i32 0
  96   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  97   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  98   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  99   %5 = insertelement <4 x float> undef, float %b, i32 0
 100   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 101   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 102   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 103   %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
 104   %r = extractelement <4 x float> %9, i32 0
 105   ret float %r
 106 }
 107
 108 define float @test_add_ss_1(float %a, float %b) {
 109 ; CHECK-LABEL: @test_add_ss_1(
 110 ; CHECK-NEXT:    ret float 1.000000e+00
 111 ;
 112   %1 = insertelement <4 x float> undef, float %a, i32 0
 113   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 114   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 115   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 116   %5 = insertelement <4 x float> undef, float %b, i32 0
 117   %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
 118   %7 = extractelement <4 x float> %6, i32 1
 119   ret float %7
 120 }
 121
 122 define float @test_add_ss_2(float %a) {
 123 ; CHECK-LABEL: @test_add_ss_2(
 124 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd float [[A:%.*]], [[A]]
 125 ; CHECK-NEXT:    ret float [[TMP1]]
 126 ;
 127   %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
 128   %2 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %1, <4 x float> %1)
 129   %3 = extractelement <4 x float> %2, i32 0
 130   ret float %3
 131 }
 132
 133 define float @test_sub_ss_0(float %a, float %b) {
 134 ; CHECK-LABEL: @test_sub_ss_0(
 135 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub float [[A:%.*]], [[B:%.*]]
 136 ; CHECK-NEXT:    ret float [[TMP1]]
 137 ;
 138   %1 = insertelement <4 x float> undef, float %a, i32 0
 139   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 140   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 141   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 142   %5 = insertelement <4 x float> undef, float %b, i32 0
 143   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 144   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 145   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 146   %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
 147   %r = extractelement <4 x float> %9, i32 0
 148   ret float %r
 149 }
 150
 151 define float @test_sub_ss_2(float %a, float %b) {
 152 ; CHECK-LABEL: @test_sub_ss_2(
 153 ; CHECK-NEXT:    ret float 2.000000e+00
 154 ;
 155   %1 = insertelement <4 x float> undef, float %a, i32 0
 156   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 157   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 158   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 159   %5 = insertelement <4 x float> undef, float %b, i32 0
 160   %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
 161   %7 = extractelement <4 x float> %6, i32 2
 162   ret float %7
 163 }
 164
 165 define float @test_sub_ss_3(float %a) {
 166 ; CHECK-LABEL: @test_sub_ss_3(
 167 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub float [[A:%.*]], [[A]]
 168 ; CHECK-NEXT:    ret float [[TMP1]]
 169 ;
 170   %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
 171   %2 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %1, <4 x float> %1)
 172   %3 = extractelement <4 x float> %2, i32 0
 173   ret float %3
 174 }
 175
 176 define float @test_mul_ss_0(float %a, float %b) {
 177 ; CHECK-LABEL: @test_mul_ss_0(
 178 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[A:%.*]], [[B:%.*]]
 179 ; CHECK-NEXT:    ret float [[TMP1]]
 180 ;
 181   %1 = insertelement <4 x float> undef, float %a, i32 0
 182   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 183   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 184   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 185   %5 = insertelement <4 x float> undef, float %b, i32 0
 186   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 187   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 188   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 189   %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
 190   %r = extractelement <4 x float> %9, i32 0
 191   ret float %r
 192 }
 193
 194 define float @test_mul_ss_3(float %a, float %b) {
 195 ; CHECK-LABEL: @test_mul_ss_3(
 196 ; CHECK-NEXT:    ret float 3.000000e+00
 197 ;
 198   %1 = insertelement <4 x float> undef, float %a, i32 0
 199   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 200   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 201   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 202   %5 = insertelement <4 x float> undef, float %b, i32 0
 203   %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
 204   %7 = extractelement <4 x float> %6, i32 3
 205   ret float %7
 206 }
 207
 208 define float @test_mul_ss_4(float %a) {
 209 ; CHECK-LABEL: @test_mul_ss_4(
 210 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[A:%.*]], [[A]]
 211 ; CHECK-NEXT:    ret float [[TMP1]]
 212 ;
 213   %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
 214   %2 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %1, <4 x float> %1)
 215   %3 = extractelement <4 x float> %2, i32 0
 216   ret float %3
 217 }
 218
 219 define float @test_div_ss_0(float %a, float %b) {
 220 ; CHECK-LABEL: @test_div_ss_0(
 221 ; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
 222 ; CHECK-NEXT:    ret float [[TMP1]]
 223 ;
 224   %1 = insertelement <4 x float> undef, float %a, i32 0
 225   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 226   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 227   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 228   %5 = insertelement <4 x float> undef, float %b, i32 0
 229   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 230   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 231   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 232   %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
 233   %r = extractelement <4 x float> %9, i32 0
 234   ret float %r
 235 }
 236
 237 define float @test_div_ss_1(float %a, float %b) {
 238 ; CHECK-LABEL: @test_div_ss_1(
 239 ; CHECK-NEXT:    ret float 1.000000e+00
 240 ;
 241   %1 = insertelement <4 x float> undef, float %a, i32 0
 242   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 243   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 244   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 245   %5 = insertelement <4 x float> undef, float %b, i32 0
 246   %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
 247   %7 = extractelement <4 x float> %6, i32 1
 248   ret float %7
 249 }
 250
 251 define float @test_div_ss_2(float %a) {
 252 ; CHECK-LABEL: @test_div_ss_2(
 253 ; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float [[A:%.*]], [[A]]
 254 ; CHECK-NEXT:    ret float [[TMP1]]
 255 ;
 256   %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
 257   %2 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %1, <4 x float> %1)
 258   %3 = extractelement <4 x float> %2, i32 0
 259   ret float %3
 260 }
 261
 262 define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
 263 ; CHECK-LABEL: @test_min_ss(
 264 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
 265 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 266 ;
 267   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
 268   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
 269   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
 270   %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
 271   ret <4 x float> %4
 272 }
 273
 274 define float @test_min_ss_0(float %a, float %b) {
 275 ; CHECK-LABEL: @test_min_ss_0(
 276 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 277 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 278 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 279 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
 280 ; CHECK-NEXT:    ret float [[TMP4]]
 281 ;
 282   %1 = insertelement <4 x float> undef, float %a, i32 0
 283   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 284   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 285   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 286   %5 = insertelement <4 x float> undef, float %b, i32 0
 287   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 288   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 289   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 290   %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
 291   %10 = extractelement <4 x float> %9, i32 0
 292   ret float %10
 293 }
 294
 295 define float @test_min_ss_2(float %a, float %b) {
 296 ; CHECK-LABEL: @test_min_ss_2(
 297 ; CHECK-NEXT:    ret float 2.000000e+00
 298 ;
 299   %1 = insertelement <4 x float> undef, float %a, i32 0
 300   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 301   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 302   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 303   %5 = insertelement <4 x float> undef, float %b, i32 0
 304   %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
 305   %7 = extractelement <4 x float> %6, i32 2
 306   ret float %7
 307 }
 308
 309 define float @test_min_ss_3(float %a) {
 310 ; CHECK-LABEL: @test_min_ss_3(
 311 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
 312 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]])
 313 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 314 ; CHECK-NEXT:    ret float [[TMP3]]
 315 ;
 316   %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
 317   %2 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %1)
 318   %3 = extractelement <4 x float> %2, i32 0
 319   ret float %3
 320 }
 321
 322 define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
 323 ; CHECK-LABEL: @test_max_ss(
 324 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
 325 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 326 ;
 327   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
 328   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
 329   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
 330   %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
 331   ret <4 x float> %4
 332 }
 333
 334 define float @test_max_ss_0(float %a, float %b) {
 335 ; CHECK-LABEL: @test_max_ss_0(
 336 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 337 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 338 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 339 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
 340 ; CHECK-NEXT:    ret float [[TMP4]]
 341 ;
 342   %1 = insertelement <4 x float> undef, float %a, i32 0
 343   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 344   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 345   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 346   %5 = insertelement <4 x float> undef, float %b, i32 0
 347   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 348   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 349   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 350   %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
 351   %10 = extractelement <4 x float> %9, i32 0
 352   ret float %10
 353 }
 354
 355 define float @test_max_ss_3(float %a, float %b) {
 356 ; CHECK-LABEL: @test_max_ss_3(
 357 ; CHECK-NEXT:    ret float 3.000000e+00
 358 ;
 359   %1 = insertelement <4 x float> undef, float %a, i32 0
 360   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 361   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 362   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 363   %5 = insertelement <4 x float> undef, float %b, i32 0
 364   %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
 365   %7 = extractelement <4 x float> %6, i32 3
 366   ret float %7
 367 }
 368
 369 define float @test_max_ss_4(float %a) {
 370 ; CHECK-LABEL: @test_max_ss_4(
 371 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
 372 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]])
 373 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 374 ; CHECK-NEXT:    ret float [[TMP3]]
 375 ;
 376   %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
 377   %2 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %1)
 378   %3 = extractelement <4 x float> %2, i32 0
 379   ret float %3
 380 }
 381
 382 define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
 383 ; CHECK-LABEL: @test_cmp_ss(
 384 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i8 0)
 385 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 386 ;
 387   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
 388   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
 389   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
 390   %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
 391   ret <4 x float> %4
 392 }
 393
 394 define float @test_cmp_ss_0(float %a, float %b) {
 395 ; CHECK-LABEL: @test_cmp_ss_0(
 396 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 397 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 398 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
 399 ; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
 400 ; CHECK-NEXT:    ret float [[R]]
 401 ;
 402   %1 = insertelement <4 x float> undef, float %a, i32 0
 403   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 404   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 405   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 406   %5 = insertelement <4 x float> undef, float %b, i32 0
 407   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 408   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 409   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 410   %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
 411   %r = extractelement <4 x float> %9, i32 0
 412   ret float %r
 413 }
 414
 415 define float @test_cmp_ss_1(float %a, float %b) {
 416 ; CHECK-LABEL: @test_cmp_ss_1(
 417 ; CHECK-NEXT:    ret float 1.000000e+00
 418 ;
 419   %1 = insertelement <4 x float> undef, float %a, i32 0
 420   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 421   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 422   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 423   %5 = insertelement <4 x float> undef, float %b, i32 0
 424   %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
 425   %7 = extractelement <4 x float> %6, i32 1
 426   ret float %7
 427 }
 428
 429 define float @test_cmp_ss_2(float %a) {
 430 ; CHECK-LABEL: @test_cmp_ss_2(
 431 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
 432 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]], i8 3)
 433 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 434 ; CHECK-NEXT:    ret float [[TMP3]]
 435 ;
 436   %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
 437   %2 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %1, i8 3)
 438   %3 = extractelement <4 x float> %2, i32 0
 439   ret float %3
 440 }
 441
 442 define i32 @test_comieq_ss_0(float %a, float %b) {
 443 ; CHECK-LABEL: @test_comieq_ss_0(
 444 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 445 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 446 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 447 ; CHECK-NEXT:    ret i32 [[TMP3]]
 448 ;
 449   %1 = insertelement <4 x float> undef, float %a, i32 0
 450   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 451   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 452   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 453   %5 = insertelement <4 x float> undef, float %b, i32 0
 454   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 455   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 456   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 457   %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
 458   ret i32 %9
 459 }
 460
 461 define i32 @test_comige_ss_0(float %a, float %b) {
 462 ; CHECK-LABEL: @test_comige_ss_0(
 463 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 464 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 465 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 466 ; CHECK-NEXT:    ret i32 [[TMP3]]
 467 ;
 468   %1 = insertelement <4 x float> undef, float %a, i32 0
 469   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 470   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 471   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 472   %5 = insertelement <4 x float> undef, float %b, i32 0
 473   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 474   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 475   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 476   %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
 477   ret i32 %9
 478 }
 479
 480 define i32 @test_comigt_ss_0(float %a, float %b) {
 481 ; CHECK-LABEL: @test_comigt_ss_0(
 482 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 483 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 484 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 485 ; CHECK-NEXT:    ret i32 [[TMP3]]
 486 ;
 487   %1 = insertelement <4 x float> undef, float %a, i32 0
 488   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 489   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 490   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 491   %5 = insertelement <4 x float> undef, float %b, i32 0
 492   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 493   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 494   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 495   %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
 496   ret i32 %9
 497 }
 498
 499 define i32 @test_comile_ss_0(float %a, float %b) {
 500 ; CHECK-LABEL: @test_comile_ss_0(
 501 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 502 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 503 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 504 ; CHECK-NEXT:    ret i32 [[TMP3]]
 505 ;
 506   %1 = insertelement <4 x float> undef, float %a, i32 0
 507   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 508   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 509   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 510   %5 = insertelement <4 x float> undef, float %b, i32 0
 511   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 512   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 513   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 514   %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
 515   ret i32 %9
 516 }
 517
 518 define i32 @test_comilt_ss_0(float %a, float %b) {
 519 ; CHECK-LABEL: @test_comilt_ss_0(
 520 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 521 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 522 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 523 ; CHECK-NEXT:    ret i32 [[TMP3]]
 524 ;
 525   %1 = insertelement <4 x float> undef, float %a, i32 0
 526   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 527   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 528   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 529   %5 = insertelement <4 x float> undef, float %b, i32 0
 530   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 531   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 532   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 533   %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
 534   ret i32 %9
 535 }
 536
 537 define i32 @test_comineq_ss_0(float %a, float %b) {
 538 ; CHECK-LABEL: @test_comineq_ss_0(
 539 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 540 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 541 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 542 ; CHECK-NEXT:    ret i32 [[TMP3]]
 543 ;
 544   %1 = insertelement <4 x float> undef, float %a, i32 0
 545   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 546   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 547   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 548   %5 = insertelement <4 x float> undef, float %b, i32 0
 549   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 550   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 551   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 552   %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
 553   ret i32 %9
 554 }
 555
 556 define i32 @test_ucomieq_ss_0(float %a, float %b) {
 557 ; CHECK-LABEL: @test_ucomieq_ss_0(
 558 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 559 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 560 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 561 ; CHECK-NEXT:    ret i32 [[TMP3]]
 562 ;
 563   %1 = insertelement <4 x float> undef, float %a, i32 0
 564   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 565   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 566   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 567   %5 = insertelement <4 x float> undef, float %b, i32 0
 568   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 569   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 570   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 571   %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
 572   ret i32 %9
 573 }
 574
 575 define i32 @test_ucomige_ss_0(float %a, float %b) {
 576 ; CHECK-LABEL: @test_ucomige_ss_0(
 577 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 578 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 579 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 580 ; CHECK-NEXT:    ret i32 [[TMP3]]
 581 ;
 582   %1 = insertelement <4 x float> undef, float %a, i32 0
 583   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 584   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 585   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 586   %5 = insertelement <4 x float> undef, float %b, i32 0
 587   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 588   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 589   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 590   %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
 591   ret i32 %9
 592 }
 593
 594 define i32 @test_ucomigt_ss_0(float %a, float %b) {
 595 ; CHECK-LABEL: @test_ucomigt_ss_0(
 596 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 597 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 598 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 599 ; CHECK-NEXT:    ret i32 [[TMP3]]
 600 ;
 601   %1 = insertelement <4 x float> undef, float %a, i32 0
 602   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 603   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 604   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 605   %5 = insertelement <4 x float> undef, float %b, i32 0
 606   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 607   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 608   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 609   %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
 610   ret i32 %9
 611 }
 612
 613 define i32 @test_ucomile_ss_0(float %a, float %b) {
 614 ; CHECK-LABEL: @test_ucomile_ss_0(
 615 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 616 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 617 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 618 ; CHECK-NEXT:    ret i32 [[TMP3]]
 619 ;
 620   %1 = insertelement <4 x float> undef, float %a, i32 0
 621   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 622   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 623   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 624   %5 = insertelement <4 x float> undef, float %b, i32 0
 625   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 626   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 627   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 628   %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
 629   ret i32 %9
 630 }
 631
 632 define i32 @test_ucomilt_ss_0(float %a, float %b) {
 633 ; CHECK-LABEL: @test_ucomilt_ss_0(
 634 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 635 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 636 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 637 ; CHECK-NEXT:    ret i32 [[TMP3]]
 638 ;
 639   %1 = insertelement <4 x float> undef, float %a, i32 0
 640   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 641   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 642   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 643   %5 = insertelement <4 x float> undef, float %b, i32 0
 644   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 645   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 646   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 647   %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
 648   ret i32 %9
 649 }
 650
 651 define i32 @test_ucomineq_ss_0(float %a, float %b) {
 652 ; CHECK-LABEL: @test_ucomineq_ss_0(
 653 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
 654 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
 655 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
 656 ; CHECK-NEXT:    ret i32 [[TMP3]]
 657 ;
 658   %1 = insertelement <4 x float> undef, float %a, i32 0
 659   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
 660   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
 661   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
 662   %5 = insertelement <4 x float> undef, float %b, i32 0
 663   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
 664   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
 665   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
 666   %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
 667   ret i32 %9
 668 }
 669
 670 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
 671 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
 672 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
 673
 674 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
 675 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
 676 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
 677 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
 678 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
 679 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
 680 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
 681
 682 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
 683 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
 684 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
 685 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
 686 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
 687 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
 688
 689 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
 690 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
 691 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
 692 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
 693 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
 694 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)