1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
5 define double @f1(double %a) {
9 ; X86-NEXT: .cfi_def_cfa_offset 8
10 ; X86-NEXT: .cfi_offset %ebp, -8
11 ; X86-NEXT: movl %esp, %ebp
12 ; X86-NEXT: .cfi_def_cfa_register %ebp
13 ; X86-NEXT: andl $-8, %esp
14 ; X86-NEXT: subl $8, %esp
15 ; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
16 ; X86-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
17 ; X86-NEXT: movsd %xmm0, (%esp)
18 ; X86-NEXT: fldl (%esp)
19 ; X86-NEXT: movl %ebp, %esp
21 ; X86-NEXT: .cfi_def_cfa %esp, 4
26 ; X64-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28 %1 = fadd fast double %a, %a
29 %2 = fadd fast double %a, %a
30 %3 = fadd fast double %1, %2
34 define double @f2(double %a) {
37 ; X86-NEXT: pushl %ebp
38 ; X86-NEXT: .cfi_def_cfa_offset 8
39 ; X86-NEXT: .cfi_offset %ebp, -8
40 ; X86-NEXT: movl %esp, %ebp
41 ; X86-NEXT: .cfi_def_cfa_register %ebp
42 ; X86-NEXT: andl $-8, %esp
43 ; X86-NEXT: subl $8, %esp
44 ; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
45 ; X86-NEXT: addsd %xmm0, %xmm0
46 ; X86-NEXT: movapd %xmm0, %xmm1
47 ; X86-NEXT: #ARITH_FENCE
48 ; X86-NEXT: addsd %xmm0, %xmm1
49 ; X86-NEXT: movsd %xmm1, (%esp)
50 ; X86-NEXT: fldl (%esp)
51 ; X86-NEXT: movl %ebp, %esp
53 ; X86-NEXT: .cfi_def_cfa %esp, 4
58 ; X64-NEXT: addsd %xmm0, %xmm0
59 ; X64-NEXT: movapd %xmm0, %xmm1
60 ; X64-NEXT: #ARITH_FENCE
61 ; X64-NEXT: addsd %xmm1, %xmm0
63 %1 = fadd fast double %a, %a
64 %t = call double @llvm.arithmetic.fence.f64(double %1)
65 %2 = fadd fast double %a, %a
66 %3 = fadd fast double %t, %2
70 define <2 x float> @f3(<2 x float> %a) {
73 ; X86-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
78 ; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
80 %1 = fadd fast <2 x float> %a, %a
81 %2 = fadd fast <2 x float> %a, %a
82 %3 = fadd fast <2 x float> %1, %2
86 define <2 x float> @f4(<2 x float> %a) {
89 ; X86-NEXT: addps %xmm0, %xmm0
90 ; X86-NEXT: movaps %xmm0, %xmm1
91 ; X86-NEXT: #ARITH_FENCE
92 ; X86-NEXT: addps %xmm1, %xmm0
97 ; X64-NEXT: addps %xmm0, %xmm0
98 ; X64-NEXT: movaps %xmm0, %xmm1
99 ; X64-NEXT: #ARITH_FENCE
100 ; X64-NEXT: addps %xmm1, %xmm0
102 %1 = fadd fast <2 x float> %a, %a
103 %t = call <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float> %1)
104 %2 = fadd fast <2 x float> %a, %a
105 %3 = fadd fast <2 x float> %t, %2
109 define <8 x float> @f5(<8 x float> %a) {
112 ; X86-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,4.0E+0,4.0E+0,4.0E+0]
113 ; X86-NEXT: mulps %xmm2, %xmm0
114 ; X86-NEXT: mulps %xmm2, %xmm1
119 ; X64-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,4.0E+0,4.0E+0,4.0E+0]
120 ; X64-NEXT: mulps %xmm2, %xmm0
121 ; X64-NEXT: mulps %xmm2, %xmm1
123 %1 = fadd fast <8 x float> %a, %a
124 %2 = fadd fast <8 x float> %a, %a
125 %3 = fadd fast <8 x float> %1, %2
129 define <8 x float> @f6(<8 x float> %a) {
132 ; X86-NEXT: addps %xmm0, %xmm0
133 ; X86-NEXT: addps %xmm1, %xmm1
134 ; X86-NEXT: movaps %xmm1, %xmm2
135 ; X86-NEXT: #ARITH_FENCE
136 ; X86-NEXT: movaps %xmm0, %xmm3
137 ; X86-NEXT: #ARITH_FENCE
138 ; X86-NEXT: addps %xmm3, %xmm0
139 ; X86-NEXT: addps %xmm2, %xmm1
144 ; X64-NEXT: addps %xmm0, %xmm0
145 ; X64-NEXT: addps %xmm1, %xmm1
146 ; X64-NEXT: movaps %xmm1, %xmm2
147 ; X64-NEXT: #ARITH_FENCE
148 ; X64-NEXT: movaps %xmm0, %xmm3
149 ; X64-NEXT: #ARITH_FENCE
150 ; X64-NEXT: addps %xmm3, %xmm0
151 ; X64-NEXT: addps %xmm2, %xmm1
153 %1 = fadd fast <8 x float> %a, %a
154 %t = call <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float> %1)
155 %2 = fadd fast <8 x float> %a, %a
156 %3 = fadd fast <8 x float> %t, %2
160 declare float @llvm.arithmetic.fence.f32(float)
161 declare double @llvm.arithmetic.fence.f64(double)
162 declare <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float>)
163 declare <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float>)