1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+sse | FileCheck --check-prefix=WIN32 %s
3 ; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck --check-prefix=WIN64 %s
4 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck --check-prefix=LINUXOSX %s
6 ; Test regcall when receiving/returning i1
7 define x86_regcallcc i1 @test_argReti1(i1 %a) {
8 ; WIN32-LABEL: test_argReti1:
10 ; WIN32-NEXT: incb %al
11 ; WIN32-NEXT: # kill: def $al killed $al killed $eax
14 ; WIN64-LABEL: test_argReti1:
16 ; WIN64-NEXT: incb %al
17 ; WIN64-NEXT: # kill: def $al killed $al killed $eax
20 ; LINUXOSX-LABEL: test_argReti1:
22 ; LINUXOSX-NEXT: incb %al
23 ; LINUXOSX-NEXT: # kill: def $al killed $al killed $eax
29 ; Test regcall when passing/retrieving i1
30 define x86_regcallcc i1 @test_CallargReti1(i1 %a) {
31 ; WIN32-LABEL: test_CallargReti1:
33 ; WIN32-NEXT: incb %al
34 ; WIN32-NEXT: movzbl %al, %eax
35 ; WIN32-NEXT: calll _test_argReti1
36 ; WIN32-NEXT: incb %al
39 ; WIN64-LABEL: test_CallargReti1:
41 ; WIN64-NEXT: pushq %rax
42 ; WIN64-NEXT: .seh_stackalloc 8
43 ; WIN64-NEXT: .seh_endprologue
44 ; WIN64-NEXT: incb %al
45 ; WIN64-NEXT: movzbl %al, %eax
46 ; WIN64-NEXT: callq test_argReti1
47 ; WIN64-NEXT: incb %al
48 ; WIN64-NEXT: popq %rcx
50 ; WIN64-NEXT: .seh_endproc
52 ; LINUXOSX-LABEL: test_CallargReti1:
54 ; LINUXOSX-NEXT: pushq %rax
55 ; LINUXOSX-NEXT: .cfi_def_cfa_offset 16
56 ; LINUXOSX-NEXT: incb %al
57 ; LINUXOSX-NEXT: movzbl %al, %eax
58 ; LINUXOSX-NEXT: callq *test_argReti1@GOTPCREL(%rip)
59 ; LINUXOSX-NEXT: incb %al
60 ; LINUXOSX-NEXT: popq %rcx
61 ; LINUXOSX-NEXT: .cfi_def_cfa_offset 8
64 %c = call x86_regcallcc i1 @test_argReti1(i1 %b)
69 ;test calling conventions - input parameters, callee saved xmms
70 define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, <16 x float> %c) nounwind {
71 ; WIN32-LABEL: testf32_inp:
73 ; WIN32-NEXT: pushl %ebp
74 ; WIN32-NEXT: movl %esp, %ebp
75 ; WIN32-NEXT: andl $-16, %esp
76 ; WIN32-NEXT: subl $32, %esp
77 ; WIN32-NEXT: movaps %xmm7, (%esp) # 16-byte Spill
78 ; WIN32-NEXT: movaps %xmm6, %xmm7
79 ; WIN32-NEXT: movaps %xmm5, %xmm6
80 ; WIN32-NEXT: movaps %xmm4, %xmm5
81 ; WIN32-NEXT: movaps %xmm1, %xmm4
82 ; WIN32-NEXT: movaps %xmm0, %xmm1
83 ; WIN32-NEXT: addps %xmm5, %xmm0
84 ; WIN32-NEXT: mulps %xmm5, %xmm1
85 ; WIN32-NEXT: subps %xmm1, %xmm0
86 ; WIN32-NEXT: movups 8(%ebp), %xmm1
87 ; WIN32-NEXT: addps %xmm1, %xmm0
88 ; WIN32-NEXT: movaps %xmm4, %xmm1
89 ; WIN32-NEXT: addps %xmm6, %xmm1
90 ; WIN32-NEXT: mulps %xmm6, %xmm4
91 ; WIN32-NEXT: subps %xmm4, %xmm1
92 ; WIN32-NEXT: movups 24(%ebp), %xmm4
93 ; WIN32-NEXT: addps %xmm4, %xmm1
94 ; WIN32-NEXT: movaps %xmm2, %xmm4
95 ; WIN32-NEXT: addps %xmm7, %xmm4
96 ; WIN32-NEXT: mulps %xmm7, %xmm2
97 ; WIN32-NEXT: subps %xmm2, %xmm4
98 ; WIN32-NEXT: movups 40(%ebp), %xmm2
99 ; WIN32-NEXT: addps %xmm2, %xmm4
100 ; WIN32-NEXT: movaps %xmm3, %xmm5
101 ; WIN32-NEXT: movaps (%esp), %xmm2 # 16-byte Reload
102 ; WIN32-NEXT: addps %xmm2, %xmm5
103 ; WIN32-NEXT: mulps %xmm2, %xmm3
104 ; WIN32-NEXT: subps %xmm3, %xmm5
105 ; WIN32-NEXT: movups 56(%ebp), %xmm2
106 ; WIN32-NEXT: addps %xmm2, %xmm5
107 ; WIN32-NEXT: movaps %xmm4, %xmm2
108 ; WIN32-NEXT: movaps %xmm5, %xmm3
109 ; WIN32-NEXT: movl %ebp, %esp
110 ; WIN32-NEXT: popl %ebp
113 ; WIN64-LABEL: testf32_inp:
115 ; WIN64-NEXT: subq $72, %rsp
116 ; WIN64-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
117 ; WIN64-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
118 ; WIN64-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
119 ; WIN64-NEXT: movaps %xmm12, (%rsp) # 16-byte Spill
120 ; WIN64-NEXT: movaps %xmm0, %xmm12
121 ; WIN64-NEXT: addps %xmm4, %xmm12
122 ; WIN64-NEXT: movaps %xmm1, %xmm13
123 ; WIN64-NEXT: addps %xmm5, %xmm13
124 ; WIN64-NEXT: movaps %xmm2, %xmm14
125 ; WIN64-NEXT: addps %xmm6, %xmm14
126 ; WIN64-NEXT: movaps %xmm3, %xmm15
127 ; WIN64-NEXT: addps %xmm7, %xmm15
128 ; WIN64-NEXT: mulps %xmm4, %xmm0
129 ; WIN64-NEXT: subps %xmm0, %xmm12
130 ; WIN64-NEXT: mulps %xmm5, %xmm1
131 ; WIN64-NEXT: subps %xmm1, %xmm13
132 ; WIN64-NEXT: mulps %xmm6, %xmm2
133 ; WIN64-NEXT: subps %xmm2, %xmm14
134 ; WIN64-NEXT: mulps %xmm7, %xmm3
135 ; WIN64-NEXT: subps %xmm3, %xmm15
136 ; WIN64-NEXT: addps %xmm8, %xmm12
137 ; WIN64-NEXT: addps %xmm9, %xmm13
138 ; WIN64-NEXT: addps %xmm10, %xmm14
139 ; WIN64-NEXT: addps %xmm11, %xmm15
140 ; WIN64-NEXT: movaps %xmm12, %xmm0
141 ; WIN64-NEXT: movaps %xmm13, %xmm1
142 ; WIN64-NEXT: movaps %xmm14, %xmm2
143 ; WIN64-NEXT: movaps %xmm15, %xmm3
144 ; WIN64-NEXT: movaps (%rsp), %xmm12 # 16-byte Reload
145 ; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
146 ; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
147 ; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
148 ; WIN64-NEXT: addq $72, %rsp
151 ; LINUXOSX-LABEL: testf32_inp:
153 ; LINUXOSX-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
154 ; LINUXOSX-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
155 ; LINUXOSX-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
156 ; LINUXOSX-NEXT: movaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
157 ; LINUXOSX-NEXT: movaps %xmm0, %xmm12
158 ; LINUXOSX-NEXT: addps %xmm4, %xmm12
159 ; LINUXOSX-NEXT: movaps %xmm1, %xmm13
160 ; LINUXOSX-NEXT: addps %xmm5, %xmm13
161 ; LINUXOSX-NEXT: movaps %xmm2, %xmm14
162 ; LINUXOSX-NEXT: addps %xmm6, %xmm14
163 ; LINUXOSX-NEXT: movaps %xmm3, %xmm15
164 ; LINUXOSX-NEXT: addps %xmm7, %xmm15
165 ; LINUXOSX-NEXT: mulps %xmm4, %xmm0
166 ; LINUXOSX-NEXT: subps %xmm0, %xmm12
167 ; LINUXOSX-NEXT: mulps %xmm5, %xmm1
168 ; LINUXOSX-NEXT: subps %xmm1, %xmm13
169 ; LINUXOSX-NEXT: mulps %xmm6, %xmm2
170 ; LINUXOSX-NEXT: subps %xmm2, %xmm14
171 ; LINUXOSX-NEXT: mulps %xmm7, %xmm3
172 ; LINUXOSX-NEXT: subps %xmm3, %xmm15
173 ; LINUXOSX-NEXT: addps %xmm8, %xmm12
174 ; LINUXOSX-NEXT: addps %xmm9, %xmm13
175 ; LINUXOSX-NEXT: addps %xmm10, %xmm14
176 ; LINUXOSX-NEXT: addps %xmm11, %xmm15
177 ; LINUXOSX-NEXT: movaps %xmm12, %xmm0
178 ; LINUXOSX-NEXT: movaps %xmm13, %xmm1
179 ; LINUXOSX-NEXT: movaps %xmm14, %xmm2
180 ; LINUXOSX-NEXT: movaps %xmm15, %xmm3
181 ; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
182 ; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
183 ; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
184 ; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
185 ; LINUXOSX-NEXT: retq
186 %x1 = fadd <16 x float> %a, %b
187 %x2 = fmul <16 x float> %a, %b
188 %x3 = fsub <16 x float> %x1, %x2
189 %x4 = fadd <16 x float> %x3, %c
193 ;test calling conventions - input parameters, callee saved GPRs
194 define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6,
195 ; WIN32-LABEL: testi32_inp:
197 ; WIN32-NEXT: pushl %ebp
198 ; WIN32-NEXT: pushl %ebx
199 ; WIN32-NEXT: subl $12, %esp
200 ; WIN32-NEXT: movl %esi, (%esp) # 4-byte Spill
201 ; WIN32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
202 ; WIN32-NEXT: movl %eax, %ebp
203 ; WIN32-NEXT: leal (%edx,%edi), %eax
204 ; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
205 ; WIN32-NEXT: movl %edx, %eax
206 ; WIN32-NEXT: subl %edi, %eax
207 ; WIN32-NEXT: movl %ebp, %edx
208 ; WIN32-NEXT: subl %ecx, %edx
209 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
210 ; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ebx
211 ; WIN32-NEXT: imull %edx, %ebx
212 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
213 ; WIN32-NEXT: movl %esi, %edx
214 ; WIN32-NEXT: subl {{[0-9]+}}(%esp), %edx
215 ; WIN32-NEXT: imull %eax, %edx
216 ; WIN32-NEXT: addl %ebx, %edx
217 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
218 ; WIN32-NEXT: movl (%esp), %edi # 4-byte Reload
219 ; WIN32-NEXT: subl %ebx, %edi
220 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
221 ; WIN32-NEXT: movl %ecx, %eax
222 ; WIN32-NEXT: subl {{[0-9]+}}(%esp), %eax
223 ; WIN32-NEXT: imull %edi, %eax
224 ; WIN32-NEXT: addl %edx, %eax
225 ; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
226 ; WIN32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload
227 ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
228 ; WIN32-NEXT: addl {{[0-9]+}}(%esp), %edx
229 ; WIN32-NEXT: imull %edx, %ebp
230 ; WIN32-NEXT: addl {{[0-9]+}}(%esp), %esi
231 ; WIN32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
232 ; WIN32-NEXT: addl %esi, %ebp
233 ; WIN32-NEXT: addl {{[0-9]+}}(%esp), %ecx
234 ; WIN32-NEXT: imull %ebx, %ecx
235 ; WIN32-NEXT: addl %ecx, %ebp
236 ; WIN32-NEXT: addl %eax, %ebp
237 ; WIN32-NEXT: movl %ebp, %eax
238 ; WIN32-NEXT: addl $12, %esp
239 ; WIN32-NEXT: popl %ebx
240 ; WIN32-NEXT: popl %ebp
243 ; WIN64-LABEL: testi32_inp:
245 ; WIN64-NEXT: pushq %rbp
246 ; WIN64-NEXT: pushq %rbx
247 ; WIN64-NEXT: # kill: def $edx killed $edx def $rdx
248 ; WIN64-NEXT: # kill: def $esi killed $esi def $rsi
249 ; WIN64-NEXT: # kill: def $r15d killed $r15d def $r15
250 ; WIN64-NEXT: # kill: def $r14d killed $r14d def $r14
251 ; WIN64-NEXT: # kill: def $r12d killed $r12d def $r12
252 ; WIN64-NEXT: # kill: def $r11d killed $r11d def $r11
253 ; WIN64-NEXT: # kill: def $r10d killed $r10d def $r10
254 ; WIN64-NEXT: # kill: def $r9d killed $r9d def $r9
255 ; WIN64-NEXT: # kill: def $r8d killed $r8d def $r8
256 ; WIN64-NEXT: # kill: def $edi killed $edi def $rdi
257 ; WIN64-NEXT: leal (%rdx,%rdi), %ebx
258 ; WIN64-NEXT: movl %edx, %ebp
259 ; WIN64-NEXT: subl %edi, %ebp
260 ; WIN64-NEXT: leal (%rsi,%r8), %edx
261 ; WIN64-NEXT: # kill: def $esi killed $esi killed $rsi
262 ; WIN64-NEXT: subl %r8d, %esi
263 ; WIN64-NEXT: leal (%r9,%r10), %edi
264 ; WIN64-NEXT: movl %r9d, %r8d
265 ; WIN64-NEXT: subl %r10d, %r8d
266 ; WIN64-NEXT: movl %eax, %r9d
267 ; WIN64-NEXT: subl %ecx, %r9d
268 ; WIN64-NEXT: imull %r9d, %r8d
269 ; WIN64-NEXT: leal (%r11,%r12), %r9d
270 ; WIN64-NEXT: movl %r11d, %r10d
271 ; WIN64-NEXT: subl %r12d, %r10d
272 ; WIN64-NEXT: imull %ebp, %r10d
273 ; WIN64-NEXT: addl %r8d, %r10d
274 ; WIN64-NEXT: leal (%r14,%r15), %r8d
275 ; WIN64-NEXT: movl %r14d, %r11d
276 ; WIN64-NEXT: subl %r15d, %r11d
277 ; WIN64-NEXT: imull %esi, %r11d
278 ; WIN64-NEXT: addl %r10d, %r11d
279 ; WIN64-NEXT: addl %ecx, %eax
280 ; WIN64-NEXT: imull %edi, %eax
281 ; WIN64-NEXT: imull %ebx, %r9d
282 ; WIN64-NEXT: addl %r9d, %eax
283 ; WIN64-NEXT: imull %edx, %r8d
284 ; WIN64-NEXT: addl %r8d, %eax
285 ; WIN64-NEXT: addl %r11d, %eax
286 ; WIN64-NEXT: popq %rbx
287 ; WIN64-NEXT: popq %rbp
290 ; LINUXOSX-LABEL: testi32_inp:
292 ; LINUXOSX-NEXT: # kill: def $edx killed $edx def $rdx
293 ; LINUXOSX-NEXT: # kill: def $esi killed $esi def $rsi
294 ; LINUXOSX-NEXT: # kill: def $r14d killed $r14d def $r14
295 ; LINUXOSX-NEXT: # kill: def $r13d killed $r13d def $r13
296 ; LINUXOSX-NEXT: # kill: def $r12d killed $r12d def $r12
297 ; LINUXOSX-NEXT: # kill: def $r9d killed $r9d def $r9
298 ; LINUXOSX-NEXT: # kill: def $r8d killed $r8d def $r8
299 ; LINUXOSX-NEXT: # kill: def $edi killed $edi def $rdi
300 ; LINUXOSX-NEXT: leal (%rdx,%rdi), %r10d
301 ; LINUXOSX-NEXT: movl %edx, %r11d
302 ; LINUXOSX-NEXT: subl %edi, %r11d
303 ; LINUXOSX-NEXT: leal (%rsi,%r8), %edx
304 ; LINUXOSX-NEXT: # kill: def $esi killed $esi killed $rsi
305 ; LINUXOSX-NEXT: subl %r8d, %esi
306 ; LINUXOSX-NEXT: leal (%r9,%r12), %edi
307 ; LINUXOSX-NEXT: movl %r9d, %r8d
308 ; LINUXOSX-NEXT: subl %r12d, %r8d
309 ; LINUXOSX-NEXT: movl %eax, %r9d
310 ; LINUXOSX-NEXT: subl %ecx, %r9d
311 ; LINUXOSX-NEXT: imull %r9d, %r8d
312 ; LINUXOSX-NEXT: leal (%r13,%r14), %r9d
313 ; LINUXOSX-NEXT: movl %r13d, %r12d
314 ; LINUXOSX-NEXT: subl %r14d, %r12d
315 ; LINUXOSX-NEXT: imull %r11d, %r12d
316 ; LINUXOSX-NEXT: movl {{[0-9]+}}(%rsp), %r11d
317 ; LINUXOSX-NEXT: addl %r8d, %r12d
318 ; LINUXOSX-NEXT: movl %r15d, %r8d
319 ; LINUXOSX-NEXT: subl %r11d, %r8d
320 ; LINUXOSX-NEXT: imull %esi, %r8d
321 ; LINUXOSX-NEXT: addl %r12d, %r8d
322 ; LINUXOSX-NEXT: addl %ecx, %eax
323 ; LINUXOSX-NEXT: imull %edi, %eax
324 ; LINUXOSX-NEXT: imull %r10d, %r9d
325 ; LINUXOSX-NEXT: addl %r9d, %eax
326 ; LINUXOSX-NEXT: addl %r15d, %r11d
327 ; LINUXOSX-NEXT: imull %edx, %r11d
328 ; LINUXOSX-NEXT: addl %r11d, %eax
329 ; LINUXOSX-NEXT: addl %r8d, %eax
330 ; LINUXOSX-NEXT: retq
331 i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
332 %x1 = sub i32 %a1, %a2
333 %x2 = sub i32 %a3, %a4
334 %x3 = sub i32 %a5, %a6
335 %y1 = sub i32 %b1, %b2
336 %y2 = sub i32 %b3, %b4
337 %y3 = sub i32 %b5, %b6
338 %v1 = add i32 %a1, %a2
339 %v2 = add i32 %a3, %a4
340 %v3 = add i32 %a5, %a6
341 %w1 = add i32 %b1, %b2
342 %w2 = add i32 %b3, %b4
343 %w3 = add i32 %b5, %b6
344 %s1 = mul i32 %x1, %y1
345 %s2 = mul i32 %x2, %y2
346 %s3 = mul i32 %x3, %y3
347 %t1 = mul i32 %v1, %w1
348 %t2 = mul i32 %v2, %w2
349 %t3 = mul i32 %v3, %w3
350 %m1 = add i32 %s1, %s2
351 %m2 = add i32 %m1, %s3
352 %n1 = add i32 %t1, %t2
353 %n2 = add i32 %n1, %t3
354 %r1 = add i32 %m2, %n2
358 ; Test that parameters, overflowing register capacity, are passed through the stack
359 define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
360 ; WIN32-LABEL: testf32_stack:
362 ; WIN32-NEXT: pushl %ebp
363 ; WIN32-NEXT: movl %esp, %ebp
364 ; WIN32-NEXT: andl $-16, %esp
365 ; WIN32-NEXT: subl $48, %esp
366 ; WIN32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
367 ; WIN32-NEXT: movaps %xmm6, (%esp) # 16-byte Spill
368 ; WIN32-NEXT: movaps %xmm5, %xmm6
369 ; WIN32-NEXT: movaps %xmm4, %xmm5
370 ; WIN32-NEXT: movaps %xmm3, %xmm4
371 ; WIN32-NEXT: movaps %xmm2, %xmm3
372 ; WIN32-NEXT: movaps %xmm1, %xmm2
373 ; WIN32-NEXT: movaps %xmm0, %xmm1
374 ; WIN32-NEXT: movups 120(%ebp), %xmm7
375 ; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
376 ; WIN32-NEXT: addps %xmm7, %xmm0
377 ; WIN32-NEXT: movups 248(%ebp), %xmm7
378 ; WIN32-NEXT: addps %xmm7, %xmm0
379 ; WIN32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
380 ; WIN32-NEXT: movups 104(%ebp), %xmm7
381 ; WIN32-NEXT: movaps (%esp), %xmm0 # 16-byte Reload
382 ; WIN32-NEXT: addps %xmm7, %xmm0
383 ; WIN32-NEXT: movups 232(%ebp), %xmm7
384 ; WIN32-NEXT: addps %xmm7, %xmm0
385 ; WIN32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill
386 ; WIN32-NEXT: movups 88(%ebp), %xmm7
387 ; WIN32-NEXT: addps %xmm7, %xmm6
388 ; WIN32-NEXT: movups 216(%ebp), %xmm7
389 ; WIN32-NEXT: addps %xmm7, %xmm6
390 ; WIN32-NEXT: movups 72(%ebp), %xmm7
391 ; WIN32-NEXT: addps %xmm7, %xmm5
392 ; WIN32-NEXT: movups 200(%ebp), %xmm7
393 ; WIN32-NEXT: addps %xmm7, %xmm5
394 ; WIN32-NEXT: movups 56(%ebp), %xmm7
395 ; WIN32-NEXT: addps %xmm7, %xmm4
396 ; WIN32-NEXT: movups 184(%ebp), %xmm7
397 ; WIN32-NEXT: addps %xmm7, %xmm4
398 ; WIN32-NEXT: movups 40(%ebp), %xmm7
399 ; WIN32-NEXT: addps %xmm7, %xmm3
400 ; WIN32-NEXT: movups 168(%ebp), %xmm7
401 ; WIN32-NEXT: addps %xmm7, %xmm3
402 ; WIN32-NEXT: movups 24(%ebp), %xmm7
403 ; WIN32-NEXT: addps %xmm7, %xmm2
404 ; WIN32-NEXT: movups 152(%ebp), %xmm7
405 ; WIN32-NEXT: addps %xmm7, %xmm2
406 ; WIN32-NEXT: movups 8(%ebp), %xmm7
407 ; WIN32-NEXT: addps %xmm7, %xmm1
408 ; WIN32-NEXT: movups 136(%ebp), %xmm7
409 ; WIN32-NEXT: addps %xmm7, %xmm1
410 ; WIN32-NEXT: movaps %xmm1, %xmm0
411 ; WIN32-NEXT: movaps %xmm2, %xmm1
412 ; WIN32-NEXT: movaps %xmm3, %xmm2
413 ; WIN32-NEXT: movaps %xmm4, %xmm3
414 ; WIN32-NEXT: movaps %xmm5, %xmm4
415 ; WIN32-NEXT: movaps %xmm6, %xmm5
416 ; WIN32-NEXT: movaps (%esp), %xmm6 # 16-byte Reload
417 ; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
418 ; WIN32-NEXT: movl %ebp, %esp
419 ; WIN32-NEXT: popl %ebp
422 ; WIN64-LABEL: testf32_stack:
424 ; WIN64-NEXT: pushq %rax
425 ; WIN64-NEXT: addps %xmm15, %xmm7
426 ; WIN64-NEXT: addps %xmm14, %xmm6
427 ; WIN64-NEXT: addps %xmm13, %xmm5
428 ; WIN64-NEXT: addps %xmm12, %xmm4
429 ; WIN64-NEXT: addps %xmm11, %xmm3
430 ; WIN64-NEXT: addps %xmm10, %xmm2
431 ; WIN64-NEXT: addps %xmm9, %xmm1
432 ; WIN64-NEXT: addps %xmm8, %xmm0
433 ; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
434 ; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
435 ; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
436 ; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
437 ; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm4
438 ; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm5
439 ; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm6
440 ; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm7
441 ; WIN64-NEXT: popq %rax
444 ; LINUXOSX-LABEL: testf32_stack:
446 ; LINUXOSX-NEXT: addps %xmm15, %xmm7
447 ; LINUXOSX-NEXT: addps %xmm14, %xmm6
448 ; LINUXOSX-NEXT: addps %xmm13, %xmm5
449 ; LINUXOSX-NEXT: addps %xmm12, %xmm4
450 ; LINUXOSX-NEXT: addps %xmm11, %xmm3
451 ; LINUXOSX-NEXT: addps %xmm10, %xmm2
452 ; LINUXOSX-NEXT: addps %xmm9, %xmm1
453 ; LINUXOSX-NEXT: addps %xmm8, %xmm0
454 ; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
455 ; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
456 ; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
457 ; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
458 ; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm4
459 ; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm5
460 ; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm6
461 ; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm7
462 ; LINUXOSX-NEXT: retq
463 %x1 = fadd <32 x float> %a, %b
464 %x2 = fadd <32 x float> %x1, %c