test/CodeGen/X86/scalarize-fp.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE
   3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx  | FileCheck %s --check-prefixes=ALL,AVX
   4
   5 define <4 x float> @fadd_op1_constant_v4f32(float %x) nounwind {
   6 ; SSE-LABEL: fadd_op1_constant_v4f32:
   7 ; SSE:       # %bb.0:
   8 ; SSE-NEXT:    addss {{.*}}(%rip), %xmm0
   9 ; SSE-NEXT:    retq
  10 ;
  11 ; AVX-LABEL: fadd_op1_constant_v4f32:
  12 ; AVX:       # %bb.0:
  13 ; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
  14 ; AVX-NEXT:    retq
  15   %v = insertelement <4 x float> undef, float %x, i32 0
  16   %b = fadd <4 x float> %v, <float 42.0, float undef, float undef, float undef>
  17   ret <4 x float> %b
  18 }
  19
  20 define <4 x float> @load_fadd_op1_constant_v4f32(float* %p) nounwind {
  21 ; SSE-LABEL: load_fadd_op1_constant_v4f32:
  22 ; SSE:       # %bb.0:
  23 ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  24 ; SSE-NEXT:    addss {{.*}}(%rip), %xmm0
  25 ; SSE-NEXT:    retq
  26 ;
  27 ; AVX-LABEL: load_fadd_op1_constant_v4f32:
  28 ; AVX:       # %bb.0:
  29 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  30 ; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
  31 ; AVX-NEXT:    retq
  32   %x = load float, float* %p
  33   %v = insertelement <4 x float> undef, float %x, i32 0
  34   %b = fadd <4 x float> %v, <float 42.0, float undef, float undef, float undef>
  35   ret <4 x float> %b
  36 }
  37
  38 define <4 x float> @fsub_op0_constant_v4f32(float %x) nounwind {
  39 ; SSE-LABEL: fsub_op0_constant_v4f32:
  40 ; SSE:       # %bb.0:
  41 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  42 ; SSE-NEXT:    subss %xmm0, %xmm1
  43 ; SSE-NEXT:    movaps %xmm1, %xmm0
  44 ; SSE-NEXT:    retq
  45 ;
  46 ; AVX-LABEL: fsub_op0_constant_v4f32:
  47 ; AVX:       # %bb.0:
  48 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  49 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
  50 ; AVX-NEXT:    retq
  51   %v = insertelement <4 x float> undef, float %x, i32 0
  52   %b = fsub <4 x float> <float 42.0, float undef, float undef, float undef>, %v
  53   ret <4 x float> %b
  54 }
  55
  56 define <4 x float> @load_fsub_op0_constant_v4f32(float* %p) nounwind {
  57 ; SSE-LABEL: load_fsub_op0_constant_v4f32:
  58 ; SSE:       # %bb.0:
  59 ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  60 ; SSE-NEXT:    subss (%rdi), %xmm0
  61 ; SSE-NEXT:    retq
  62 ;
  63 ; AVX-LABEL: load_fsub_op0_constant_v4f32:
  64 ; AVX:       # %bb.0:
  65 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  66 ; AVX-NEXT:    vsubss (%rdi), %xmm0, %xmm0
  67 ; AVX-NEXT:    retq
  68   %x = load float, float* %p
  69   %v = insertelement <4 x float> undef, float %x, i32 0
  70   %b = fsub <4 x float> <float 42.0, float undef, float undef, float undef>, %v
  71   ret <4 x float> %b
  72 }
  73
  74 define <4 x float> @fmul_op1_constant_v4f32(float %x) nounwind {
  75 ; SSE-LABEL: fmul_op1_constant_v4f32:
  76 ; SSE:       # %bb.0:
  77 ; SSE-NEXT:    mulss {{.*}}(%rip), %xmm0
  78 ; SSE-NEXT:    retq
  79 ;
  80 ; AVX-LABEL: fmul_op1_constant_v4f32:
  81 ; AVX:       # %bb.0:
  82 ; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
  83 ; AVX-NEXT:    retq
  84   %v = insertelement <4 x float> undef, float %x, i32 0
  85   %b = fmul <4 x float> %v, <float 42.0, float undef, float undef, float undef>
  86   ret <4 x float> %b
  87 }
  88
  89 define <4 x float> @load_fmul_op1_constant_v4f32(float* %p) nounwind {
  90 ; SSE-LABEL: load_fmul_op1_constant_v4f32:
  91 ; SSE:       # %bb.0:
  92 ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  93 ; SSE-NEXT:    mulss {{.*}}(%rip), %xmm0
  94 ; SSE-NEXT:    retq
  95 ;
  96 ; AVX-LABEL: load_fmul_op1_constant_v4f32:
  97 ; AVX:       # %bb.0:
  98 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  99 ; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
 100 ; AVX-NEXT:    retq
 101   %x = load float, float* %p
 102   %v = insertelement <4 x float> undef, float %x, i32 0
 103   %b = fmul <4 x float> %v, <float 42.0, float undef, float undef, float undef>
 104   ret <4 x float> %b
 105 }
 106
 107 define <4 x float> @fdiv_op1_constant_v4f32(float %x) nounwind {
 108 ; SSE-LABEL: fdiv_op1_constant_v4f32:
 109 ; SSE:       # %bb.0:
 110 ; SSE-NEXT:    divss {{.*}}(%rip), %xmm0
 111 ; SSE-NEXT:    retq
 112 ;
 113 ; AVX-LABEL: fdiv_op1_constant_v4f32:
 114 ; AVX:       # %bb.0:
 115 ; AVX-NEXT:    vdivss {{.*}}(%rip), %xmm0, %xmm0
 116 ; AVX-NEXT:    retq
 117   %v = insertelement <4 x float> undef, float %x, i32 0
 118   %b = fdiv <4 x float> %v, <float 42.0, float undef, float undef, float undef>
 119   ret <4 x float> %b
 120 }
 121
 122 define <4 x float> @load_fdiv_op1_constant_v4f32(float* %p) nounwind {
 123 ; SSE-LABEL: load_fdiv_op1_constant_v4f32:
 124 ; SSE:       # %bb.0:
 125 ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 126 ; SSE-NEXT:    divss {{.*}}(%rip), %xmm0
 127 ; SSE-NEXT:    retq
 128 ;
 129 ; AVX-LABEL: load_fdiv_op1_constant_v4f32:
 130 ; AVX:       # %bb.0:
 131 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 132 ; AVX-NEXT:    vdivss {{.*}}(%rip), %xmm0, %xmm0
 133 ; AVX-NEXT:    retq
 134   %x = load float, float* %p
 135   %v = insertelement <4 x float> undef, float %x, i32 0
 136   %b = fdiv <4 x float> %v, <float 42.0, float undef, float undef, float undef>
 137   ret <4 x float> %b
 138 }
 139
 140 define <4 x float> @fdiv_op0_constant_v4f32(float %x) nounwind {
 141 ; SSE-LABEL: fdiv_op0_constant_v4f32:
 142 ; SSE:       # %bb.0:
 143 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 144 ; SSE-NEXT:    divss %xmm0, %xmm1
 145 ; SSE-NEXT:    movaps %xmm1, %xmm0
 146 ; SSE-NEXT:    retq
 147 ;
 148 ; AVX-LABEL: fdiv_op0_constant_v4f32:
 149 ; AVX:       # %bb.0:
 150 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 151 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 152 ; AVX-NEXT:    retq
 153   %v = insertelement <4 x float> undef, float %x, i32 0
 154   %b = fdiv <4 x float> <float 42.0, float undef, float undef, float undef>, %v
 155   ret <4 x float> %b
 156 }
 157
 158 define <4 x float> @load_fdiv_op0_constant_v4f32(float* %p) nounwind {
 159 ; SSE-LABEL: load_fdiv_op0_constant_v4f32:
 160 ; SSE:       # %bb.0:
 161 ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 162 ; SSE-NEXT:    divss (%rdi), %xmm0
 163 ; SSE-NEXT:    retq
 164 ;
 165 ; AVX-LABEL: load_fdiv_op0_constant_v4f32:
 166 ; AVX:       # %bb.0:
 167 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 168 ; AVX-NEXT:    vdivss (%rdi), %xmm0, %xmm0
 169 ; AVX-NEXT:    retq
 170   %x = load float, float* %p
 171   %v = insertelement <4 x float> undef, float %x, i32 0
 172   %b = fdiv <4 x float> <float 42.0, float undef, float undef, float undef>, %v
 173   ret <4 x float> %b
 174 }
 175
 176 define <4 x double> @fadd_op1_constant_v4f64(double %x) nounwind {
 177 ; SSE-LABEL: fadd_op1_constant_v4f64:
 178 ; SSE:       # %bb.0:
 179 ; SSE-NEXT:    addsd {{.*}}(%rip), %xmm0
 180 ; SSE-NEXT:    retq
 181 ;
 182 ; AVX-LABEL: fadd_op1_constant_v4f64:
 183 ; AVX:       # %bb.0:
 184 ; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
 185 ; AVX-NEXT:    retq
 186   %v = insertelement <4 x double> undef, double %x, i32 0
 187   %b = fadd <4 x double> %v, <double 42.0, double undef, double undef, double undef>
 188   ret <4 x double> %b
 189 }
 190
 191 define <4 x double> @load_fadd_op1_constant_v4f64(double* %p) nounwind {
 192 ; SSE-LABEL: load_fadd_op1_constant_v4f64:
 193 ; SSE:       # %bb.0:
 194 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 195 ; SSE-NEXT:    addsd {{.*}}(%rip), %xmm0
 196 ; SSE-NEXT:    retq
 197 ;
 198 ; AVX-LABEL: load_fadd_op1_constant_v4f64:
 199 ; AVX:       # %bb.0:
 200 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 201 ; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
 202 ; AVX-NEXT:    retq
 203   %x = load double, double* %p
 204   %v = insertelement <4 x double> undef, double %x, i32 0
 205   %b = fadd <4 x double> %v, <double 42.0, double undef, double undef, double undef>
 206   ret <4 x double> %b
 207 }
 208
 209 define <4 x double> @fsub_op0_constant_v4f64(double %x) nounwind {
 210 ; SSE-LABEL: fsub_op0_constant_v4f64:
 211 ; SSE:       # %bb.0:
 212 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
 213 ; SSE-NEXT:    subsd %xmm0, %xmm1
 214 ; SSE-NEXT:    movapd %xmm1, %xmm0
 215 ; SSE-NEXT:    retq
 216 ;
 217 ; AVX-LABEL: fsub_op0_constant_v4f64:
 218 ; AVX:       # %bb.0:
 219 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 220 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 221 ; AVX-NEXT:    retq
 222   %v = insertelement <4 x double> undef, double %x, i32 0
 223   %b = fsub <4 x double> <double 42.0, double undef, double undef, double undef>, %v
 224   ret <4 x double> %b
 225 }
 226
 227 define <4 x double> @load_fsub_op0_constant_v4f64(double* %p) nounwind {
 228 ; SSE-LABEL: load_fsub_op0_constant_v4f64:
 229 ; SSE:       # %bb.0:
 230 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 231 ; SSE-NEXT:    subsd (%rdi), %xmm0
 232 ; SSE-NEXT:    retq
 233 ;
 234 ; AVX-LABEL: load_fsub_op0_constant_v4f64:
 235 ; AVX:       # %bb.0:
 236 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 237 ; AVX-NEXT:    vsubsd (%rdi), %xmm0, %xmm0
 238 ; AVX-NEXT:    retq
 239   %x = load double, double* %p
 240   %v = insertelement <4 x double> undef, double %x, i32 0
 241   %b = fsub <4 x double> <double 42.0, double undef, double undef, double undef>, %v
 242   ret <4 x double> %b
 243 }
 244
 245 define <4 x double> @fmul_op1_constant_v4f64(double %x) nounwind {
 246 ; SSE-LABEL: fmul_op1_constant_v4f64:
 247 ; SSE:       # %bb.0:
 248 ; SSE-NEXT:    mulsd {{.*}}(%rip), %xmm0
 249 ; SSE-NEXT:    retq
 250 ;
 251 ; AVX-LABEL: fmul_op1_constant_v4f64:
 252 ; AVX:       # %bb.0:
 253 ; AVX-NEXT:    vmulsd {{.*}}(%rip), %xmm0, %xmm0
 254 ; AVX-NEXT:    retq
 255   %v = insertelement <4 x double> undef, double %x, i32 0
 256   %b = fmul <4 x double> %v, <double 42.0, double undef, double undef, double undef>
 257   ret <4 x double> %b
 258 }
 259
 260 define <4 x double> @load_fmul_op1_constant_v4f64(double* %p) nounwind {
 261 ; SSE-LABEL: load_fmul_op1_constant_v4f64:
 262 ; SSE:       # %bb.0:
 263 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 264 ; SSE-NEXT:    mulsd {{.*}}(%rip), %xmm0
 265 ; SSE-NEXT:    retq
 266 ;
 267 ; AVX-LABEL: load_fmul_op1_constant_v4f64:
 268 ; AVX:       # %bb.0:
 269 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 270 ; AVX-NEXT:    vmulsd {{.*}}(%rip), %xmm0, %xmm0
 271 ; AVX-NEXT:    retq
 272   %x = load double, double* %p
 273   %v = insertelement <4 x double> undef, double %x, i32 0
 274   %b = fmul <4 x double> %v, <double 42.0, double undef, double undef, double undef>
 275   ret <4 x double> %b
 276 }
 277
 278 define <4 x double> @fdiv_op1_constant_v4f64(double %x) nounwind {
 279 ; SSE-LABEL: fdiv_op1_constant_v4f64:
 280 ; SSE:       # %bb.0:
 281 ; SSE-NEXT:    divsd {{.*}}(%rip), %xmm0
 282 ; SSE-NEXT:    retq
 283 ;
 284 ; AVX-LABEL: fdiv_op1_constant_v4f64:
 285 ; AVX:       # %bb.0:
 286 ; AVX-NEXT:    vdivsd {{.*}}(%rip), %xmm0, %xmm0
 287 ; AVX-NEXT:    retq
 288   %v = insertelement <4 x double> undef, double %x, i32 0
 289   %b = fdiv <4 x double> %v, <double 42.0, double undef, double undef, double undef>
 290   ret <4 x double> %b
 291 }
 292
 293 define <4 x double> @load_fdiv_op1_constant_v4f64(double* %p) nounwind {
 294 ; SSE-LABEL: load_fdiv_op1_constant_v4f64:
 295 ; SSE:       # %bb.0:
 296 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 297 ; SSE-NEXT:    divsd {{.*}}(%rip), %xmm0
 298 ; SSE-NEXT:    retq
 299 ;
 300 ; AVX-LABEL: load_fdiv_op1_constant_v4f64:
 301 ; AVX:       # %bb.0:
 302 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 303 ; AVX-NEXT:    vdivsd {{.*}}(%rip), %xmm0, %xmm0
 304 ; AVX-NEXT:    retq
 305   %x = load double, double* %p
 306   %v = insertelement <4 x double> undef, double %x, i32 0
 307   %b = fdiv <4 x double> %v, <double 42.0, double undef, double undef, double undef>
 308   ret <4 x double> %b
 309 }
 310
 311 define <4 x double> @fdiv_op0_constant_v4f64(double %x) nounwind {
 312 ; SSE-LABEL: fdiv_op0_constant_v4f64:
 313 ; SSE:       # %bb.0:
 314 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
 315 ; SSE-NEXT:    divsd %xmm0, %xmm1
 316 ; SSE-NEXT:    movapd %xmm1, %xmm0
 317 ; SSE-NEXT:    retq
 318 ;
 319 ; AVX-LABEL: fdiv_op0_constant_v4f64:
 320 ; AVX:       # %bb.0:
 321 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 322 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 323 ; AVX-NEXT:    retq
 324   %v = insertelement <4 x double> undef, double %x, i32 0
 325   %b = fdiv <4 x double> <double 42.0, double undef, double undef, double undef>, %v
 326   ret <4 x double> %b
 327 }
 328
 329 define <4 x double> @load_fdiv_op0_constant_v4f64(double* %p) nounwind {
 330 ; SSE-LABEL: load_fdiv_op0_constant_v4f64:
 331 ; SSE:       # %bb.0:
 332 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 333 ; SSE-NEXT:    divsd (%rdi), %xmm0
 334 ; SSE-NEXT:    retq
 335 ;
 336 ; AVX-LABEL: load_fdiv_op0_constant_v4f64:
 337 ; AVX:       # %bb.0:
 338 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 339 ; AVX-NEXT:    vdivsd (%rdi), %xmm0, %xmm0
 340 ; AVX-NEXT:    retq
 341   %x = load double, double* %p
 342   %v = insertelement <4 x double> undef, double %x, i32 0
 343   %b = fdiv <4 x double> <double 42.0, double undef, double undef, double undef>, %v
 344   ret <4 x double> %b
 345 }
 346
 347 define <2 x double> @fadd_splat_splat_v2f64(<2 x double> %vx, <2 x double> %vy) {
 348 ; SSE-LABEL: fadd_splat_splat_v2f64:
 349 ; SSE:       # %bb.0:
 350 ; SSE-NEXT:    addsd %xmm1, %xmm0
 351 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
 352 ; SSE-NEXT:    retq
 353 ;
 354 ; AVX-LABEL: fadd_splat_splat_v2f64:
 355 ; AVX:       # %bb.0:
 356 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 357 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 358 ; AVX-NEXT:    retq
 359   %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> zeroinitializer
 360   %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> zeroinitializer
 361   %r = fadd <2 x double> %splatx, %splaty
 362   ret <2 x double> %r
 363 }
 364
 365 define <4 x double> @fsub_splat_splat_v4f64(double %x, double %y) {
 366 ; SSE-LABEL: fsub_splat_splat_v4f64:
 367 ; SSE:       # %bb.0:
 368 ; SSE-NEXT:    subsd %xmm1, %xmm0
 369 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
 370 ; SSE-NEXT:    movapd %xmm0, %xmm1
 371 ; SSE-NEXT:    retq
 372 ;
 373 ; AVX-LABEL: fsub_splat_splat_v4f64:
 374 ; AVX:       # %bb.0:
 375 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 376 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 377 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 378 ; AVX-NEXT:    retq
 379   %vx = insertelement <4 x double> undef, double %x, i32 0
 380   %vy = insertelement <4 x double> undef, double %y, i32 0
 381   %splatx = shufflevector <4 x double> %vx, <4 x double> undef, <4 x i32> zeroinitializer
 382   %splaty = shufflevector <4 x double> %vy, <4 x double> undef, <4 x i32> zeroinitializer
 383   %r = fsub <4 x double> %splatx, %splaty
 384   ret <4 x double> %r
 385 }
 386
 387 define <4 x float> @fmul_splat_splat_v4f32(<4 x float> %vx, <4 x float> %vy) {
 388 ; SSE-LABEL: fmul_splat_splat_v4f32:
 389 ; SSE:       # %bb.0:
 390 ; SSE-NEXT:    mulss %xmm1, %xmm0
 391 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 392 ; SSE-NEXT:    retq
 393 ;
 394 ; AVX-LABEL: fmul_splat_splat_v4f32:
 395 ; AVX:       # %bb.0:
 396 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 397 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 398 ; AVX-NEXT:    retq
 399   %splatx = shufflevector <4 x float> %vx, <4 x float> undef, <4 x i32> zeroinitializer
 400   %splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer
 401   %r = fmul fast <4 x float> %splatx, %splaty
 402   ret <4 x float> %r
 403 }
 404
 405 define <8 x float> @fdiv_splat_splat_v8f32(<8 x float> %vx, <8 x float> %vy) {
 406 ; SSE-LABEL: fdiv_splat_splat_v8f32:
 407 ; SSE:       # %bb.0:
 408 ; SSE-NEXT:    divss %xmm2, %xmm0
 409 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 410 ; SSE-NEXT:    movaps %xmm0, %xmm1
 411 ; SSE-NEXT:    retq
 412 ;
 413 ; AVX-LABEL: fdiv_splat_splat_v8f32:
 414 ; AVX:       # %bb.0:
 415 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 416 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 417 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 418 ; AVX-NEXT:    retq
 419   %splatx = shufflevector <8 x float> %vx, <8 x float> undef, <8 x i32> zeroinitializer
 420   %splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer
 421   %r = fdiv fast <8 x float> %splatx, %splaty
 422   ret <8 x float> %r
 423 }
 424
 425 ; Negative test - splat of non-zero indexes (still sink the splat).
 426
 427 define <2 x double> @fadd_splat_splat_nonzero_v2f64(<2 x double> %vx, <2 x double> %vy) {
 428 ; SSE-LABEL: fadd_splat_splat_nonzero_v2f64:
 429 ; SSE:       # %bb.0:
 430 ; SSE-NEXT:    addpd %xmm1, %xmm0
 431 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
 432 ; SSE-NEXT:    retq
 433 ;
 434 ; AVX-LABEL: fadd_splat_splat_nonzero_v2f64:
 435 ; AVX:       # %bb.0:
 436 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 437 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
 438 ; AVX-NEXT:    retq
 439   %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> <i32 1, i32 1>
 440   %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> <i32 1, i32 1>
 441   %r = fadd <2 x double> %splatx, %splaty
 442   ret <2 x double> %r
 443 }
 444
 445 ; Negative test - splat of non-zero index and mismatched indexes.
 446
 447 define <2 x double> @fadd_splat_splat_mismatch_v2f64(<2 x double> %vx, <2 x double> %vy) {
 448 ; SSE-LABEL: fadd_splat_splat_mismatch_v2f64:
 449 ; SSE:       # %bb.0:
 450 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
 451 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
 452 ; SSE-NEXT:    addpd %xmm1, %xmm0
 453 ; SSE-NEXT:    retq
 454 ;
 455 ; AVX-LABEL: fadd_splat_splat_mismatch_v2f64:
 456 ; AVX:       # %bb.0:
 457 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 458 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,1]
 459 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 460 ; AVX-NEXT:    retq
 461   %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> <i32 0, i32 0>
 462   %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> <i32 1, i32 1>
 463   %r = fadd <2 x double> %splatx, %splaty
 464   ret <2 x double> %r
 465 }
 466
 467 ; Negative test - non-splat.
 468
 469 define <2 x double> @fadd_splat_nonsplat_v2f64(<2 x double> %vx, <2 x double> %vy) {
 470 ; SSE-LABEL: fadd_splat_nonsplat_v2f64:
 471 ; SSE:       # %bb.0:
 472 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
 473 ; SSE-NEXT:    addpd %xmm1, %xmm0
 474 ; SSE-NEXT:    retq
 475 ;
 476 ; AVX-LABEL: fadd_splat_nonsplat_v2f64:
 477 ; AVX:       # %bb.0:
 478 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 479 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 480 ; AVX-NEXT:    retq
 481   %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> <i32 0, i32 0>
 482   %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> <i32 0, i32 1>
 483   %r = fadd <2 x double> %splatx, %splaty
 484   ret <2 x double> %r
 485 }
 486
 487 ; Negative test - non-FP.
 488
 489 define <2 x i64> @add_splat_splat_v2i64(<2 x i64> %vx, <2 x i64> %vy) {
 490 ; SSE-LABEL: add_splat_splat_v2i64:
 491 ; SSE:       # %bb.0:
 492 ; SSE-NEXT:    paddq %xmm1, %xmm0
 493 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
 494 ; SSE-NEXT:    retq
 495 ;
 496 ; AVX-LABEL: add_splat_splat_v2i64:
 497 ; AVX:       # %bb.0:
 498 ; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
 499 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
 500 ; AVX-NEXT:    retq
 501   %splatx = shufflevector <2 x i64> %vx, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 502   %splaty = shufflevector <2 x i64> %vy, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 503   %r = add <2 x i64> %splatx, %splaty
 504   ret <2 x i64> %r
 505 }
 506
 507 define <2 x double> @fadd_splat_const_op1_v2f64(<2 x double> %vx) {
 508 ; SSE-LABEL: fadd_splat_const_op1_v2f64:
 509 ; SSE:       # %bb.0:
 510 ; SSE-NEXT:    addsd {{.*}}(%rip), %xmm0
 511 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
 512 ; SSE-NEXT:    retq
 513 ;
 514 ; AVX-LABEL: fadd_splat_const_op1_v2f64:
 515 ; AVX:       # %bb.0:
 516 ; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
 517 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 518 ; AVX-NEXT:    retq
 519   %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> zeroinitializer
 520   %r = fadd <2 x double> %splatx, <double 42.0, double 42.0>
 521   ret <2 x double> %r
 522 }
 523
 524 define <4 x double> @fsub_const_op0_splat_v4f64(double %x) {
 525 ; SSE-LABEL: fsub_const_op0_splat_v4f64:
 526 ; SSE:       # %bb.0:
 527 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
 528 ; SSE-NEXT:    subsd %xmm0, %xmm1
 529 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
 530 ; SSE-NEXT:    movapd %xmm1, %xmm0
 531 ; SSE-NEXT:    retq
 532 ;
 533 ; AVX-LABEL: fsub_const_op0_splat_v4f64:
 534 ; AVX:       # %bb.0:
 535 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 536 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 537 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 538 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 539 ; AVX-NEXT:    retq
 540   %vx = insertelement <4 x double> undef, double 8.0, i32 0
 541   %vy = insertelement <4 x double> undef, double %x, i32 0
 542   %splatx = shufflevector <4 x double> %vx, <4 x double> undef, <4 x i32> zeroinitializer
 543   %splaty = shufflevector <4 x double> %vy, <4 x double> undef, <4 x i32> zeroinitializer
 544   %r = fsub <4 x double> %splatx, %splaty
 545   ret <4 x double> %r
 546 }
 547
 548 define <4 x float> @fmul_splat_const_op1_v4f32(<4 x float> %vx, <4 x float> %vy) {
 549 ; SSE-LABEL: fmul_splat_const_op1_v4f32:
 550 ; SSE:       # %bb.0:
 551 ; SSE-NEXT:    mulss {{.*}}(%rip), %xmm0
 552 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 553 ; SSE-NEXT:    retq
 554 ;
 555 ; AVX-LABEL: fmul_splat_const_op1_v4f32:
 556 ; AVX:       # %bb.0:
 557 ; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
 558 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 559 ; AVX-NEXT:    retq
 560   %splatx = shufflevector <4 x float> %vx, <4 x float> undef, <4 x i32> zeroinitializer
 561   %r = fmul fast <4 x float> %splatx, <float 17.0, float 17.0, float 17.0, float 17.0>
 562   ret <4 x float> %r
 563 }
 564
 565 define <8 x float> @fdiv_splat_const_op0_v8f32(<8 x float> %vy) {
 566 ; SSE-LABEL: fdiv_splat_const_op0_v8f32:
 567 ; SSE:       # %bb.0:
 568 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 569 ; SSE-NEXT:    divss %xmm0, %xmm1
 570 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
 571 ; SSE-NEXT:    movaps %xmm1, %xmm0
 572 ; SSE-NEXT:    retq
 573 ;
 574 ; AVX-LABEL: fdiv_splat_const_op0_v8f32:
 575 ; AVX:       # %bb.0:
 576 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 577 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 578 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 579 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 580 ; AVX-NEXT:    retq
 581   %splatx = shufflevector <8 x float> <float 4.5, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, <8 x float> undef, <8 x i32> zeroinitializer
 582   %splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer
 583   %r = fdiv fast <8 x float> %splatx, %splaty
 584   ret <8 x float> %r
 585 }
 586
 587 define <8 x float> @fdiv_const_op1_splat_v8f32(<8 x float> %vx) {
 588 ; SSE-LABEL: fdiv_const_op1_splat_v8f32:
 589 ; SSE:       # %bb.0:
 590 ; SSE-NEXT:    xorps %xmm1, %xmm1
 591 ; SSE-NEXT:    divss %xmm1, %xmm0
 592 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 593 ; SSE-NEXT:    movaps %xmm0, %xmm1
 594 ; SSE-NEXT:    retq
 595 ;
 596 ; AVX-LABEL: fdiv_const_op1_splat_v8f32:
 597 ; AVX:       # %bb.0:
 598 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 599 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 600 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 601 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 602 ; AVX-NEXT:    retq
 603   %splatx = shufflevector <8 x float> %vx, <8 x float> undef, <8 x i32> zeroinitializer
 604   %splaty = shufflevector <8 x float> <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, <8 x float> undef, <8 x i32> zeroinitializer
 605   %r = fdiv fast <8 x float> %splatx, %splaty
 606   ret <8 x float> %r
 607 }
 608
 609 define <2 x double> @splat0_fadd_v2f64(<2 x double> %vx, <2 x double> %vy) {
 610 ; SSE-LABEL: splat0_fadd_v2f64:
 611 ; SSE:       # %bb.0:
 612 ; SSE-NEXT:    addsd %xmm1, %xmm0
 613 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
 614 ; SSE-NEXT:    retq
 615 ;
 616 ; AVX-LABEL: splat0_fadd_v2f64:
 617 ; AVX:       # %bb.0:
 618 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 619 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 620 ; AVX-NEXT:    retq
 621   %b = fadd <2 x double> %vx, %vy
 622   %r = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
 623   ret <2 x double> %r
 624 }
 625
 626 define <4 x double> @splat0_fsub_v4f64(double %x, double %y) {
 627 ; SSE-LABEL: splat0_fsub_v4f64:
 628 ; SSE:       # %bb.0:
 629 ; SSE-NEXT:    subsd %xmm1, %xmm0
 630 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
 631 ; SSE-NEXT:    movapd %xmm0, %xmm1
 632 ; SSE-NEXT:    retq
 633 ;
 634 ; AVX-LABEL: splat0_fsub_v4f64:
 635 ; AVX:       # %bb.0:
 636 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 637 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 638 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 639 ; AVX-NEXT:    retq
 640   %vx = insertelement <4 x double> undef, double %x, i32 0
 641   %vy = insertelement <4 x double> undef, double %y, i32 0
 642   %b = fsub <4 x double> %vx, %vy
 643   %r = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
 644   ret <4 x double> %r
 645 }
 646
 647 define <4 x float> @splat0_fmul_v4f32(<4 x float> %vx, <4 x float> %vy) {
 648 ; SSE-LABEL: splat0_fmul_v4f32:
 649 ; SSE:       # %bb.0:
 650 ; SSE-NEXT:    mulss %xmm1, %xmm0
 651 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 652 ; SSE-NEXT:    retq
 653 ;
 654 ; AVX-LABEL: splat0_fmul_v4f32:
 655 ; AVX:       # %bb.0:
 656 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 657 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 658 ; AVX-NEXT:    retq
 659   %b = fmul fast <4 x float> %vx, %vy
 660   %r = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
 661   ret <4 x float> %r
 662 }
 663
 664 define <8 x float> @splat0_fdiv_v8f32(<8 x float> %vx, <8 x float> %vy) {
 665 ; SSE-LABEL: splat0_fdiv_v8f32:
 666 ; SSE:       # %bb.0:
 667 ; SSE-NEXT:    divss %xmm2, %xmm0
 668 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 669 ; SSE-NEXT:    movaps %xmm0, %xmm1
 670 ; SSE-NEXT:    retq
 671 ;
 672 ; AVX-LABEL: splat0_fdiv_v8f32:
 673 ; AVX:       # %bb.0:
 674 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 675 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 676 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 677 ; AVX-NEXT:    retq
 678   %b = fdiv fast <8 x float> %vx, %vy
 679   %r = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
 680   ret <8 x float> %r
 681 }
 682
 683 define <2 x double> @splat0_fadd_const_op1_v2f64(<2 x double> %vx) {
 684 ; SSE-LABEL: splat0_fadd_const_op1_v2f64:
 685 ; SSE:       # %bb.0:
 686 ; SSE-NEXT:    addsd {{.*}}(%rip), %xmm0
 687 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
 688 ; SSE-NEXT:    retq
 689 ;
 690 ; AVX-LABEL: splat0_fadd_const_op1_v2f64:
 691 ; AVX:       # %bb.0:
 692 ; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
 693 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 694 ; AVX-NEXT:    retq
 695   %b = fadd <2 x double> %vx, <double 42.0, double 12.0>
 696   %r = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
 697   ret <2 x double> %r
 698 }
 699
 700 define <4 x double> @splat0_fsub_const_op0_v4f64(double %x) {
 701 ; SSE-LABEL: splat0_fsub_const_op0_v4f64:
 702 ; SSE:       # %bb.0:
 703 ; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
 704 ; SSE-NEXT:    subsd %xmm0, %xmm1
 705 ; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
 706 ; SSE-NEXT:    movapd %xmm1, %xmm0
 707 ; SSE-NEXT:    retq
 708 ;
 709 ; AVX-LABEL: splat0_fsub_const_op0_v4f64:
 710 ; AVX:       # %bb.0:
 711 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 712 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 713 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 714 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 715 ; AVX-NEXT:    retq
 716   %vx = insertelement <4 x double> undef, double %x, i32 0
 717   %b = fsub <4 x double> <double -42.0, double 42.0, double 0.0, double 1.0>, %vx
 718   %r = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
 719   ret <4 x double> %r
 720 }
 721
 722 define <4 x float> @splat0_fmul_const_op1_v4f32(<4 x float> %vx) {
 723 ; SSE-LABEL: splat0_fmul_const_op1_v4f32:
 724 ; SSE:       # %bb.0:
 725 ; SSE-NEXT:    mulss {{.*}}(%rip), %xmm0
 726 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 727 ; SSE-NEXT:    retq
 728 ;
 729 ; AVX-LABEL: splat0_fmul_const_op1_v4f32:
 730 ; AVX:       # %bb.0:
 731 ; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
 732 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 733 ; AVX-NEXT:    retq
 734   %b = fmul fast <4 x float> %vx, <float 6.0, float -1.0, float 1.0, float 7.0>
 735   %r = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
 736   ret <4 x float> %r
 737 }
 738
 739 define <8 x float> @splat0_fdiv_const_op1_v8f32(<8 x float> %vx) {
 740 ; SSE-LABEL: splat0_fdiv_const_op1_v8f32:
 741 ; SSE:       # %bb.0:
 742 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 743 ; SSE-NEXT:    movaps %xmm0, %xmm1
 744 ; SSE-NEXT:    retq
 745 ;
 746 ; AVX-LABEL: splat0_fdiv_const_op1_v8f32:
 747 ; AVX:       # %bb.0:
 748 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 749 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 750 ; AVX-NEXT:    retq
 751   %b = fdiv fast <8 x float> %vx, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
 752   %r = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
 753   ret <8 x float> %r
 754 }
 755
 756 define <8 x float> @splat0_fdiv_const_op0_v8f32(<8 x float> %vx) {
 757 ; SSE-LABEL: splat0_fdiv_const_op0_v8f32:
 758 ; SSE:       # %bb.0:
 759 ; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 760 ; SSE-NEXT:    divss %xmm0, %xmm1
 761 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
 762 ; SSE-NEXT:    movaps %xmm1, %xmm0
 763 ; SSE-NEXT:    retq
 764 ;
 765 ; AVX-LABEL: splat0_fdiv_const_op0_v8f32:
 766 ; AVX:       # %bb.0:
 767 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 768 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 769 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 770 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 771 ; AVX-NEXT:    retq
 772   %b = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %vx
 773   %r = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
 774   ret <8 x float> %r
 775 }
 776
 777 define <4 x float> @multi_use_binop(<4 x float> %x, <4 x float> %y) {
 778 ; SSE-LABEL: multi_use_binop:
 779 ; SSE:       # %bb.0:
 780 ; SSE-NEXT:    mulps %xmm1, %xmm0
 781 ; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 782 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1,2,0]
 783 ; SSE-NEXT:    addps %xmm1, %xmm0
 784 ; SSE-NEXT:    retq
 785 ;
 786 ; AVX-LABEL: multi_use_binop:
 787 ; AVX:       # %bb.0:
 788 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
 789 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,1,2,0]
 790 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 791 ; AVX-NEXT:    vaddps %xmm0, %xmm1, %xmm0
 792 ; AVX-NEXT:    retq
 793   %mul = fmul <4 x float> %x, %y
 794   %mul0 = shufflevector <4 x float> %mul, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 0>
 795   %mul1 = shufflevector <4 x float> %mul, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 1>
 796   %r = fadd <4 x float> %mul0, %mul1
 797   ret <4 x float> %r
 798 }