1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
3 ; RUN: llc -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4 ; RUN: llc -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
6 ; Verify that we're folding the load into the math instruction.
7 ; This pattern is generated out of the simplest intrinsics usage:
8 ; _mm_add_ss(a, _mm_load_ss(b));
10 define <4 x float> @addss(<4 x float> %va, float* %pb) {
13 ; SSE-NEXT: addss (%rdi), %xmm0
18 ; AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0
20 %a = extractelement <4 x float> %va, i32 0
21 %b = load float, float* %pb
22 %r = fadd float %a, %b
23 %vr = insertelement <4 x float> %va, float %r, i32 0
27 define <2 x double> @addsd(<2 x double> %va, double* %pb) {
30 ; SSE-NEXT: addsd (%rdi), %xmm0
35 ; AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0
37 %a = extractelement <2 x double> %va, i32 0
38 %b = load double, double* %pb
39 %r = fadd double %a, %b
40 %vr = insertelement <2 x double> %va, double %r, i32 0
44 define <4 x float> @subss(<4 x float> %va, float* %pb) {
47 ; SSE-NEXT: subss (%rdi), %xmm0
52 ; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0
54 %a = extractelement <4 x float> %va, i32 0
55 %b = load float, float* %pb
56 %r = fsub float %a, %b
57 %vr = insertelement <4 x float> %va, float %r, i32 0
61 define <2 x double> @subsd(<2 x double> %va, double* %pb) {
64 ; SSE-NEXT: subsd (%rdi), %xmm0
69 ; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0
71 %a = extractelement <2 x double> %va, i32 0
72 %b = load double, double* %pb
73 %r = fsub double %a, %b
74 %vr = insertelement <2 x double> %va, double %r, i32 0
78 define <4 x float> @mulss(<4 x float> %va, float* %pb) {
81 ; SSE-NEXT: mulss (%rdi), %xmm0
86 ; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0
88 %a = extractelement <4 x float> %va, i32 0
89 %b = load float, float* %pb
90 %r = fmul float %a, %b
91 %vr = insertelement <4 x float> %va, float %r, i32 0
95 define <2 x double> @mulsd(<2 x double> %va, double* %pb) {
98 ; SSE-NEXT: mulsd (%rdi), %xmm0
103 ; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0
105 %a = extractelement <2 x double> %va, i32 0
106 %b = load double, double* %pb
107 %r = fmul double %a, %b
108 %vr = insertelement <2 x double> %va, double %r, i32 0
112 define <4 x float> @divss(<4 x float> %va, float* %pb) {
115 ; SSE-NEXT: divss (%rdi), %xmm0
120 ; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0
122 %a = extractelement <4 x float> %va, i32 0
123 %b = load float, float* %pb
124 %r = fdiv float %a, %b
125 %vr = insertelement <4 x float> %va, float %r, i32 0
129 define <2 x double> @divsd(<2 x double> %va, double* %pb) {
132 ; SSE-NEXT: divsd (%rdi), %xmm0
137 ; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0
139 %a = extractelement <2 x double> %va, i32 0
140 %b = load double, double* %pb
141 %r = fdiv double %a, %b
142 %vr = insertelement <2 x double> %va, double %r, i32 0