1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-unknown"
7 ; Stack reload folding tests.
9 ; By including a nop call with sideeffects we can force a partial register spill of the
10 ; relevant registers and check that the reload is correctly folded into the instruction.
12 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) {
13 ; CHECK-LABEL: stack_fold_addpd:
15 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
19 ; CHECK-NEXT: addpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
21 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
22 %2 = fadd <2 x double> %a0, %a1
26 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) {
27 ; CHECK-LABEL: stack_fold_addps:
29 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
33 ; CHECK-NEXT: addps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
35 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
36 %2 = fadd <4 x float> %a0, %a1
40 define double @stack_fold_addsd(double %a0, double %a1) {
41 ; CHECK-LABEL: stack_fold_addsd:
43 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
47 ; CHECK-NEXT: addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
49 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
50 %2 = fadd double %a0, %a1
54 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
55 ; CHECK-LABEL: stack_fold_addsd_int:
57 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
61 ; CHECK-NEXT: addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
63 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
64 %2 = extractelement <2 x double> %a0, i32 0
65 %3 = extractelement <2 x double> %a1, i32 0
66 %4 = fadd double %2, %3
67 %5 = insertelement <2 x double> %a0, double %4, i32 0
71 define float @stack_fold_addss(float %a0, float %a1) {
72 ; CHECK-LABEL: stack_fold_addss:
74 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
78 ; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
80 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
81 %2 = fadd float %a0, %a1
85 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
86 ; CHECK-LABEL: stack_fold_addss_int:
88 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
92 ; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
94 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
95 %2 = extractelement <4 x float> %a0, i32 0
96 %3 = extractelement <4 x float> %a1, i32 0
97 %4 = fadd float %2, %3
98 %5 = insertelement <4 x float> %a0, float %4, i32 0
102 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) {
103 ; CHECK-LABEL: stack_fold_addsubpd:
105 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
108 ; CHECK-NEXT: #NO_APP
109 ; CHECK-NEXT: addsubpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
111 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
112 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
115 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
117 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) {
118 ; CHECK-LABEL: stack_fold_addsubps:
120 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
123 ; CHECK-NEXT: #NO_APP
124 ; CHECK-NEXT: addsubps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
126 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
127 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
130 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
132 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
133 ; CHECK-LABEL: stack_fold_andnpd:
135 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
138 ; CHECK-NEXT: #NO_APP
139 ; CHECK-NEXT: andnpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
140 ; CHECK-NEXT: xorpd %xmm1, %xmm1
141 ; CHECK-NEXT: addpd %xmm1, %xmm0
143 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
144 %2 = bitcast <2 x double> %a0 to <2 x i64>
145 %3 = bitcast <2 x double> %a1 to <2 x i64>
146 %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
147 %5 = and <2 x i64> %4, %3
148 %6 = bitcast <2 x i64> %5 to <2 x double>
149 ; fadd forces execution domain
150 %7 = fadd <2 x double> %6, <double 0x0, double 0x0>
154 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) {
155 ; CHECK-LABEL: stack_fold_andnps:
157 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
160 ; CHECK-NEXT: #NO_APP
161 ; CHECK-NEXT: andnps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
162 ; CHECK-NEXT: xorps %xmm1, %xmm1
163 ; CHECK-NEXT: addps %xmm1, %xmm0
165 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
166 %2 = bitcast <4 x float> %a0 to <2 x i64>
167 %3 = bitcast <4 x float> %a1 to <2 x i64>
168 %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
169 %5 = and <2 x i64> %4, %3
170 %6 = bitcast <2 x i64> %5 to <4 x float>
171 ; fadd forces execution domain
172 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0>
176 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
177 ; CHECK-LABEL: stack_fold_andpd:
179 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
182 ; CHECK-NEXT: #NO_APP
183 ; CHECK-NEXT: andpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
184 ; CHECK-NEXT: xorpd %xmm1, %xmm1
185 ; CHECK-NEXT: addpd %xmm1, %xmm0
187 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
188 %2 = bitcast <2 x double> %a0 to <2 x i64>
189 %3 = bitcast <2 x double> %a1 to <2 x i64>
190 %4 = and <2 x i64> %2, %3
191 %5 = bitcast <2 x i64> %4 to <2 x double>
192 ; fadd forces execution domain
193 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
197 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) {
198 ; CHECK-LABEL: stack_fold_andps:
200 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
203 ; CHECK-NEXT: #NO_APP
204 ; CHECK-NEXT: andps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
205 ; CHECK-NEXT: xorps %xmm1, %xmm1
206 ; CHECK-NEXT: addps %xmm1, %xmm0
208 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
209 %2 = bitcast <4 x float> %a0 to <2 x i64>
210 %3 = bitcast <4 x float> %a1 to <2 x i64>
211 %4 = and <2 x i64> %2, %3
212 %5 = bitcast <2 x i64> %4 to <4 x float>
213 ; fadd forces execution domain
214 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
218 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) {
219 ; CHECK-LABEL: stack_fold_blendpd:
221 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
224 ; CHECK-NEXT: #NO_APP
225 ; CHECK-NEXT: blendpd $2, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
226 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[1]
227 ; CHECK-NEXT: xorpd %xmm1, %xmm1
228 ; CHECK-NEXT: addpd %xmm1, %xmm0
230 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
231 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1
232 ; fadd forces execution domain
233 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
237 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) {
238 ; CHECK-LABEL: stack_fold_blendps:
240 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
243 ; CHECK-NEXT: #NO_APP
244 ; CHECK-NEXT: blendps $6, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
245 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[1,2],xmm0[3]
246 ; CHECK-NEXT: xorps %xmm1, %xmm1
247 ; CHECK-NEXT: addps %xmm1, %xmm0
249 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
250 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1
251 ; fadd forces execution domain
252 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
256 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) {
257 ; CHECK-LABEL: stack_fold_blendvpd:
259 ; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
260 ; CHECK-NEXT: movapd %xmm1, %xmm2
263 ; CHECK-NEXT: #NO_APP
264 ; CHECK-NEXT: blendvpd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
265 ; CHECK-NEXT: movapd %xmm2, %xmm0
267 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
268 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0)
271 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
273 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) {
274 ; CHECK-LABEL: stack_fold_blendvps:
276 ; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
277 ; CHECK-NEXT: movaps %xmm1, %xmm2
280 ; CHECK-NEXT: #NO_APP
281 ; CHECK-NEXT: blendvps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
282 ; CHECK-NEXT: movaps %xmm2, %xmm0
284 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
285 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0)
288 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
290 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) {
291 ; CHECK-LABEL: stack_fold_cmppd:
293 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
296 ; CHECK-NEXT: #NO_APP
297 ; CHECK-NEXT: cmpeqpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
299 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
300 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
303 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
305 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
306 ; CHECK-LABEL: stack_fold_cmpps:
308 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
311 ; CHECK-NEXT: #NO_APP
312 ; CHECK-NEXT: cmpeqps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
314 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
315 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0)
318 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
320 define i32 @stack_fold_cmpsd(double %a0, double %a1) {
321 ; CHECK-LABEL: stack_fold_cmpsd:
323 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
326 ; CHECK-NEXT: #NO_APP
327 ; CHECK-NEXT: cmpeqsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
328 ; CHECK-NEXT: movq %xmm0, %rax
329 ; CHECK-NEXT: andl $1, %eax
330 ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
332 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
333 %2 = fcmp oeq double %a0, %a1
334 %3 = zext i1 %2 to i32
338 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) {
339 ; CHECK-LABEL: stack_fold_cmpsd_int:
341 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
344 ; CHECK-NEXT: #NO_APP
345 ; CHECK-NEXT: cmpeqsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
347 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
348 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
351 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
353 define i32 @stack_fold_cmpss(float %a0, float %a1) {
354 ; CHECK-LABEL: stack_fold_cmpss:
356 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
359 ; CHECK-NEXT: #NO_APP
360 ; CHECK-NEXT: cmpeqss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
361 ; CHECK-NEXT: movd %xmm0, %eax
362 ; CHECK-NEXT: andl $1, %eax
364 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
365 %2 = fcmp oeq float %a0, %a1
366 %3 = zext i1 %2 to i32
370 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) {
371 ; CHECK-LABEL: stack_fold_cmpss_int:
373 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
376 ; CHECK-NEXT: #NO_APP
377 ; CHECK-NEXT: cmpeqss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
379 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
380 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
383 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
385 ; TODO stack_fold_comisd
387 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) {
388 ; CHECK-LABEL: stack_fold_comisd_int:
390 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
393 ; CHECK-NEXT: #NO_APP
394 ; CHECK-NEXT: comisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
395 ; CHECK-NEXT: setnp %al
396 ; CHECK-NEXT: sete %cl
397 ; CHECK-NEXT: andb %al, %cl
398 ; CHECK-NEXT: movzbl %cl, %eax
400 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
401 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
404 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
406 ; TODO stack_fold_comiss
408 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) {
409 ; CHECK-LABEL: stack_fold_comiss_int:
411 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
414 ; CHECK-NEXT: #NO_APP
415 ; CHECK-NEXT: comiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
416 ; CHECK-NEXT: setnp %al
417 ; CHECK-NEXT: sete %cl
418 ; CHECK-NEXT: andb %al, %cl
419 ; CHECK-NEXT: movzbl %cl, %eax
421 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
422 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
425 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
427 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) {
428 ; CHECK-LABEL: stack_fold_cvtdq2pd:
430 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
433 ; CHECK-NEXT: #NO_APP
434 ; CHECK-NEXT: cvtdq2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
436 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
437 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
438 %3 = sitofp <2 x i32> %2 to <2 x double>
442 define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) {
443 ; CHECK-LABEL: stack_fold_cvtdq2pd_int:
445 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
448 ; CHECK-NEXT: #NO_APP
449 ; CHECK-NEXT: cvtdq2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
451 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
452 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a0, <2 x i32> <i32 0, i32 1>
453 %cvt = sitofp <2 x i32> %2 to <2 x double>
454 ret <2 x double> %cvt
457 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
458 ; CHECK-LABEL: stack_fold_cvtdq2ps:
460 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
463 ; CHECK-NEXT: #NO_APP
464 ; CHECK-NEXT: cvtdq2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
466 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
467 %2 = sitofp <4 x i32> %a0 to <4 x float>
471 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) {
472 ; CHECK-LABEL: stack_fold_cvtpd2dq:
474 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
477 ; CHECK-NEXT: #NO_APP
478 ; CHECK-NEXT: cvtpd2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
481 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
484 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
486 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) {
487 ; CHECK-LABEL: stack_fold_cvtpd2ps:
489 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
492 ; CHECK-NEXT: #NO_APP
493 ; CHECK-NEXT: cvtpd2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
495 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
496 %2 = fptrunc <2 x double> %a0 to <2 x float>
500 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) {
501 ; CHECK-LABEL: stack_fold_cvtps2dq:
503 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
506 ; CHECK-NEXT: #NO_APP
507 ; CHECK-NEXT: cvtps2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
509 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
510 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
513 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
515 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) {
516 ; CHECK-LABEL: stack_fold_cvtps2pd:
518 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
521 ; CHECK-NEXT: #NO_APP
522 ; CHECK-NEXT: cvtps2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
524 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
525 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1>
526 %3 = fpext <2 x float> %2 to <2 x double>
530 define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) {
531 ; CHECK-LABEL: stack_fold_cvtps2pd_int:
533 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
536 ; CHECK-NEXT: #NO_APP
537 ; CHECK-NEXT: cvtps2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
540 %2 = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
541 %cvtps2pd = fpext <2 x float> %2 to <2 x double>
542 ret <2 x double> %cvtps2pd
545 ; TODO stack_fold_cvtsd2si
547 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) {
548 ; CHECK-LABEL: stack_fold_cvtsd2si_int:
550 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
553 ; CHECK-NEXT: #NO_APP
554 ; CHECK-NEXT: cvtsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
556 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
557 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
560 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
562 ; TODO stack_fold_cvtsd2si64
564 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
565 ; CHECK-LABEL: stack_fold_cvtsd2si64_int:
567 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
570 ; CHECK-NEXT: #NO_APP
571 ; CHECK-NEXT: cvtsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
573 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
574 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0)
577 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
579 define float @stack_fold_cvtsd2ss(double %a0) minsize {
580 ; CHECK-LABEL: stack_fold_cvtsd2ss:
582 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
585 ; CHECK-NEXT: #NO_APP
586 ; CHECK-NEXT: cvtsd2ss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
588 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
589 %2 = fptrunc double %a0 to float
593 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize {
594 ; CHECK-LABEL: stack_fold_cvtsd2ss_int:
596 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
599 ; CHECK-NEXT: #NO_APP
600 ; CHECK-NEXT: xorps %xmm1, %xmm1
601 ; CHECK-NEXT: cvtsd2ss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
602 ; CHECK-NEXT: movaps %xmm1, %xmm0
604 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
605 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0)
608 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
610 define double @stack_fold_cvtsi2sd(i32 %a0) {
611 ; CHECK-LABEL: stack_fold_cvtsi2sd:
613 ; CHECK-NEXT: pushq %rbp
614 ; CHECK-NEXT: .cfi_def_cfa_offset 16
615 ; CHECK-NEXT: pushq %r15
616 ; CHECK-NEXT: .cfi_def_cfa_offset 24
617 ; CHECK-NEXT: pushq %r14
618 ; CHECK-NEXT: .cfi_def_cfa_offset 32
619 ; CHECK-NEXT: pushq %r13
620 ; CHECK-NEXT: .cfi_def_cfa_offset 40
621 ; CHECK-NEXT: pushq %r12
622 ; CHECK-NEXT: .cfi_def_cfa_offset 48
623 ; CHECK-NEXT: pushq %rbx
624 ; CHECK-NEXT: .cfi_def_cfa_offset 56
625 ; CHECK-NEXT: .cfi_offset %rbx, -56
626 ; CHECK-NEXT: .cfi_offset %r12, -48
627 ; CHECK-NEXT: .cfi_offset %r13, -40
628 ; CHECK-NEXT: .cfi_offset %r14, -32
629 ; CHECK-NEXT: .cfi_offset %r15, -24
630 ; CHECK-NEXT: .cfi_offset %rbp, -16
631 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
634 ; CHECK-NEXT: #NO_APP
635 ; CHECK-NEXT: xorps %xmm0, %xmm0
636 ; CHECK-NEXT: cvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
637 ; CHECK-NEXT: popq %rbx
638 ; CHECK-NEXT: .cfi_def_cfa_offset 48
639 ; CHECK-NEXT: popq %r12
640 ; CHECK-NEXT: .cfi_def_cfa_offset 40
641 ; CHECK-NEXT: popq %r13
642 ; CHECK-NEXT: .cfi_def_cfa_offset 32
643 ; CHECK-NEXT: popq %r14
644 ; CHECK-NEXT: .cfi_def_cfa_offset 24
645 ; CHECK-NEXT: popq %r15
646 ; CHECK-NEXT: .cfi_def_cfa_offset 16
647 ; CHECK-NEXT: popq %rbp
648 ; CHECK-NEXT: .cfi_def_cfa_offset 8
650 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
651 %2 = sitofp i32 %a0 to double
655 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0, <2 x double> %b0) {
656 ; CHECK-LABEL: stack_fold_cvtsi2sd_int:
658 ; CHECK-NEXT: pushq %rbp
659 ; CHECK-NEXT: .cfi_def_cfa_offset 16
660 ; CHECK-NEXT: pushq %r15
661 ; CHECK-NEXT: .cfi_def_cfa_offset 24
662 ; CHECK-NEXT: pushq %r14
663 ; CHECK-NEXT: .cfi_def_cfa_offset 32
664 ; CHECK-NEXT: pushq %r13
665 ; CHECK-NEXT: .cfi_def_cfa_offset 40
666 ; CHECK-NEXT: pushq %r12
667 ; CHECK-NEXT: .cfi_def_cfa_offset 48
668 ; CHECK-NEXT: pushq %rbx
669 ; CHECK-NEXT: .cfi_def_cfa_offset 56
670 ; CHECK-NEXT: .cfi_offset %rbx, -56
671 ; CHECK-NEXT: .cfi_offset %r12, -48
672 ; CHECK-NEXT: .cfi_offset %r13, -40
673 ; CHECK-NEXT: .cfi_offset %r14, -32
674 ; CHECK-NEXT: .cfi_offset %r15, -24
675 ; CHECK-NEXT: .cfi_offset %rbp, -16
676 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
679 ; CHECK-NEXT: #NO_APP
680 ; CHECK-NEXT: cvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
681 ; CHECK-NEXT: popq %rbx
682 ; CHECK-NEXT: .cfi_def_cfa_offset 48
683 ; CHECK-NEXT: popq %r12
684 ; CHECK-NEXT: .cfi_def_cfa_offset 40
685 ; CHECK-NEXT: popq %r13
686 ; CHECK-NEXT: .cfi_def_cfa_offset 32
687 ; CHECK-NEXT: popq %r14
688 ; CHECK-NEXT: .cfi_def_cfa_offset 24
689 ; CHECK-NEXT: popq %r15
690 ; CHECK-NEXT: .cfi_def_cfa_offset 16
691 ; CHECK-NEXT: popq %rbp
692 ; CHECK-NEXT: .cfi_def_cfa_offset 8
694 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
695 %2 = sitofp i32 %a0 to double
696 %3 = insertelement <2 x double> %b0, double %2, i64 0
700 define double @stack_fold_cvtsi642sd(i64 %a0) {
701 ; CHECK-LABEL: stack_fold_cvtsi642sd:
703 ; CHECK-NEXT: pushq %rbp
704 ; CHECK-NEXT: .cfi_def_cfa_offset 16
705 ; CHECK-NEXT: pushq %r15
706 ; CHECK-NEXT: .cfi_def_cfa_offset 24
707 ; CHECK-NEXT: pushq %r14
708 ; CHECK-NEXT: .cfi_def_cfa_offset 32
709 ; CHECK-NEXT: pushq %r13
710 ; CHECK-NEXT: .cfi_def_cfa_offset 40
711 ; CHECK-NEXT: pushq %r12
712 ; CHECK-NEXT: .cfi_def_cfa_offset 48
713 ; CHECK-NEXT: pushq %rbx
714 ; CHECK-NEXT: .cfi_def_cfa_offset 56
715 ; CHECK-NEXT: .cfi_offset %rbx, -56
716 ; CHECK-NEXT: .cfi_offset %r12, -48
717 ; CHECK-NEXT: .cfi_offset %r13, -40
718 ; CHECK-NEXT: .cfi_offset %r14, -32
719 ; CHECK-NEXT: .cfi_offset %r15, -24
720 ; CHECK-NEXT: .cfi_offset %rbp, -16
721 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
724 ; CHECK-NEXT: #NO_APP
725 ; CHECK-NEXT: xorps %xmm0, %xmm0
726 ; CHECK-NEXT: cvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
727 ; CHECK-NEXT: popq %rbx
728 ; CHECK-NEXT: .cfi_def_cfa_offset 48
729 ; CHECK-NEXT: popq %r12
730 ; CHECK-NEXT: .cfi_def_cfa_offset 40
731 ; CHECK-NEXT: popq %r13
732 ; CHECK-NEXT: .cfi_def_cfa_offset 32
733 ; CHECK-NEXT: popq %r14
734 ; CHECK-NEXT: .cfi_def_cfa_offset 24
735 ; CHECK-NEXT: popq %r15
736 ; CHECK-NEXT: .cfi_def_cfa_offset 16
737 ; CHECK-NEXT: popq %rbp
738 ; CHECK-NEXT: .cfi_def_cfa_offset 8
740 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
741 %2 = sitofp i64 %a0 to double
745 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0, <2 x double> %b0) {
746 ; CHECK-LABEL: stack_fold_cvtsi642sd_int:
748 ; CHECK-NEXT: pushq %rbp
749 ; CHECK-NEXT: .cfi_def_cfa_offset 16
750 ; CHECK-NEXT: pushq %r15
751 ; CHECK-NEXT: .cfi_def_cfa_offset 24
752 ; CHECK-NEXT: pushq %r14
753 ; CHECK-NEXT: .cfi_def_cfa_offset 32
754 ; CHECK-NEXT: pushq %r13
755 ; CHECK-NEXT: .cfi_def_cfa_offset 40
756 ; CHECK-NEXT: pushq %r12
757 ; CHECK-NEXT: .cfi_def_cfa_offset 48
758 ; CHECK-NEXT: pushq %rbx
759 ; CHECK-NEXT: .cfi_def_cfa_offset 56
760 ; CHECK-NEXT: .cfi_offset %rbx, -56
761 ; CHECK-NEXT: .cfi_offset %r12, -48
762 ; CHECK-NEXT: .cfi_offset %r13, -40
763 ; CHECK-NEXT: .cfi_offset %r14, -32
764 ; CHECK-NEXT: .cfi_offset %r15, -24
765 ; CHECK-NEXT: .cfi_offset %rbp, -16
766 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
769 ; CHECK-NEXT: #NO_APP
770 ; CHECK-NEXT: cvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
771 ; CHECK-NEXT: popq %rbx
772 ; CHECK-NEXT: .cfi_def_cfa_offset 48
773 ; CHECK-NEXT: popq %r12
774 ; CHECK-NEXT: .cfi_def_cfa_offset 40
775 ; CHECK-NEXT: popq %r13
776 ; CHECK-NEXT: .cfi_def_cfa_offset 32
777 ; CHECK-NEXT: popq %r14
778 ; CHECK-NEXT: .cfi_def_cfa_offset 24
779 ; CHECK-NEXT: popq %r15
780 ; CHECK-NEXT: .cfi_def_cfa_offset 16
781 ; CHECK-NEXT: popq %rbp
782 ; CHECK-NEXT: .cfi_def_cfa_offset 8
784 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
785 %2 = sitofp i64 %a0 to double
786 %3 = insertelement <2 x double> %b0, double %2, i64 0
790 define float @stack_fold_cvtsi2ss(i32 %a0) {
791 ; CHECK-LABEL: stack_fold_cvtsi2ss:
793 ; CHECK-NEXT: pushq %rbp
794 ; CHECK-NEXT: .cfi_def_cfa_offset 16
795 ; CHECK-NEXT: pushq %r15
796 ; CHECK-NEXT: .cfi_def_cfa_offset 24
797 ; CHECK-NEXT: pushq %r14
798 ; CHECK-NEXT: .cfi_def_cfa_offset 32
799 ; CHECK-NEXT: pushq %r13
800 ; CHECK-NEXT: .cfi_def_cfa_offset 40
801 ; CHECK-NEXT: pushq %r12
802 ; CHECK-NEXT: .cfi_def_cfa_offset 48
803 ; CHECK-NEXT: pushq %rbx
804 ; CHECK-NEXT: .cfi_def_cfa_offset 56
805 ; CHECK-NEXT: .cfi_offset %rbx, -56
806 ; CHECK-NEXT: .cfi_offset %r12, -48
807 ; CHECK-NEXT: .cfi_offset %r13, -40
808 ; CHECK-NEXT: .cfi_offset %r14, -32
809 ; CHECK-NEXT: .cfi_offset %r15, -24
810 ; CHECK-NEXT: .cfi_offset %rbp, -16
811 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
814 ; CHECK-NEXT: #NO_APP
815 ; CHECK-NEXT: xorps %xmm0, %xmm0
816 ; CHECK-NEXT: cvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
817 ; CHECK-NEXT: popq %rbx
818 ; CHECK-NEXT: .cfi_def_cfa_offset 48
819 ; CHECK-NEXT: popq %r12
820 ; CHECK-NEXT: .cfi_def_cfa_offset 40
821 ; CHECK-NEXT: popq %r13
822 ; CHECK-NEXT: .cfi_def_cfa_offset 32
823 ; CHECK-NEXT: popq %r14
824 ; CHECK-NEXT: .cfi_def_cfa_offset 24
825 ; CHECK-NEXT: popq %r15
826 ; CHECK-NEXT: .cfi_def_cfa_offset 16
827 ; CHECK-NEXT: popq %rbp
828 ; CHECK-NEXT: .cfi_def_cfa_offset 8
830 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
831 %2 = sitofp i32 %a0 to float
835 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0, <4 x float> %b0) {
836 ; CHECK-LABEL: stack_fold_cvtsi2ss_int:
838 ; CHECK-NEXT: pushq %rbp
839 ; CHECK-NEXT: .cfi_def_cfa_offset 16
840 ; CHECK-NEXT: pushq %r15
841 ; CHECK-NEXT: .cfi_def_cfa_offset 24
842 ; CHECK-NEXT: pushq %r14
843 ; CHECK-NEXT: .cfi_def_cfa_offset 32
844 ; CHECK-NEXT: pushq %r13
845 ; CHECK-NEXT: .cfi_def_cfa_offset 40
846 ; CHECK-NEXT: pushq %r12
847 ; CHECK-NEXT: .cfi_def_cfa_offset 48
848 ; CHECK-NEXT: pushq %rbx
849 ; CHECK-NEXT: .cfi_def_cfa_offset 56
850 ; CHECK-NEXT: .cfi_offset %rbx, -56
851 ; CHECK-NEXT: .cfi_offset %r12, -48
852 ; CHECK-NEXT: .cfi_offset %r13, -40
853 ; CHECK-NEXT: .cfi_offset %r14, -32
854 ; CHECK-NEXT: .cfi_offset %r15, -24
855 ; CHECK-NEXT: .cfi_offset %rbp, -16
856 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
859 ; CHECK-NEXT: #NO_APP
860 ; CHECK-NEXT: cvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
861 ; CHECK-NEXT: popq %rbx
862 ; CHECK-NEXT: .cfi_def_cfa_offset 48
863 ; CHECK-NEXT: popq %r12
864 ; CHECK-NEXT: .cfi_def_cfa_offset 40
865 ; CHECK-NEXT: popq %r13
866 ; CHECK-NEXT: .cfi_def_cfa_offset 32
867 ; CHECK-NEXT: popq %r14
868 ; CHECK-NEXT: .cfi_def_cfa_offset 24
869 ; CHECK-NEXT: popq %r15
870 ; CHECK-NEXT: .cfi_def_cfa_offset 16
871 ; CHECK-NEXT: popq %rbp
872 ; CHECK-NEXT: .cfi_def_cfa_offset 8
874 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
875 %2 = sitofp i32 %a0 to float
876 %3 = insertelement <4 x float> %b0, float %2, i64 0
880 define float @stack_fold_cvtsi642ss(i64 %a0) {
881 ; CHECK-LABEL: stack_fold_cvtsi642ss:
883 ; CHECK-NEXT: pushq %rbp
884 ; CHECK-NEXT: .cfi_def_cfa_offset 16
885 ; CHECK-NEXT: pushq %r15
886 ; CHECK-NEXT: .cfi_def_cfa_offset 24
887 ; CHECK-NEXT: pushq %r14
888 ; CHECK-NEXT: .cfi_def_cfa_offset 32
889 ; CHECK-NEXT: pushq %r13
890 ; CHECK-NEXT: .cfi_def_cfa_offset 40
891 ; CHECK-NEXT: pushq %r12
892 ; CHECK-NEXT: .cfi_def_cfa_offset 48
893 ; CHECK-NEXT: pushq %rbx
894 ; CHECK-NEXT: .cfi_def_cfa_offset 56
895 ; CHECK-NEXT: .cfi_offset %rbx, -56
896 ; CHECK-NEXT: .cfi_offset %r12, -48
897 ; CHECK-NEXT: .cfi_offset %r13, -40
898 ; CHECK-NEXT: .cfi_offset %r14, -32
899 ; CHECK-NEXT: .cfi_offset %r15, -24
900 ; CHECK-NEXT: .cfi_offset %rbp, -16
901 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
904 ; CHECK-NEXT: #NO_APP
905 ; CHECK-NEXT: xorps %xmm0, %xmm0
906 ; CHECK-NEXT: cvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
907 ; CHECK-NEXT: popq %rbx
908 ; CHECK-NEXT: .cfi_def_cfa_offset 48
909 ; CHECK-NEXT: popq %r12
910 ; CHECK-NEXT: .cfi_def_cfa_offset 40
911 ; CHECK-NEXT: popq %r13
912 ; CHECK-NEXT: .cfi_def_cfa_offset 32
913 ; CHECK-NEXT: popq %r14
914 ; CHECK-NEXT: .cfi_def_cfa_offset 24
915 ; CHECK-NEXT: popq %r15
916 ; CHECK-NEXT: .cfi_def_cfa_offset 16
917 ; CHECK-NEXT: popq %rbp
918 ; CHECK-NEXT: .cfi_def_cfa_offset 8
920 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
921 %2 = sitofp i64 %a0 to float
925 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0, <4 x float> %b0) {
926 ; CHECK-LABEL: stack_fold_cvtsi642ss_int:
928 ; CHECK-NEXT: pushq %rbp
929 ; CHECK-NEXT: .cfi_def_cfa_offset 16
930 ; CHECK-NEXT: pushq %r15
931 ; CHECK-NEXT: .cfi_def_cfa_offset 24
932 ; CHECK-NEXT: pushq %r14
933 ; CHECK-NEXT: .cfi_def_cfa_offset 32
934 ; CHECK-NEXT: pushq %r13
935 ; CHECK-NEXT: .cfi_def_cfa_offset 40
936 ; CHECK-NEXT: pushq %r12
937 ; CHECK-NEXT: .cfi_def_cfa_offset 48
938 ; CHECK-NEXT: pushq %rbx
939 ; CHECK-NEXT: .cfi_def_cfa_offset 56
940 ; CHECK-NEXT: .cfi_offset %rbx, -56
941 ; CHECK-NEXT: .cfi_offset %r12, -48
942 ; CHECK-NEXT: .cfi_offset %r13, -40
943 ; CHECK-NEXT: .cfi_offset %r14, -32
944 ; CHECK-NEXT: .cfi_offset %r15, -24
945 ; CHECK-NEXT: .cfi_offset %rbp, -16
946 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
949 ; CHECK-NEXT: #NO_APP
950 ; CHECK-NEXT: cvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
951 ; CHECK-NEXT: popq %rbx
952 ; CHECK-NEXT: .cfi_def_cfa_offset 48
953 ; CHECK-NEXT: popq %r12
954 ; CHECK-NEXT: .cfi_def_cfa_offset 40
955 ; CHECK-NEXT: popq %r13
956 ; CHECK-NEXT: .cfi_def_cfa_offset 32
957 ; CHECK-NEXT: popq %r14
958 ; CHECK-NEXT: .cfi_def_cfa_offset 24
959 ; CHECK-NEXT: popq %r15
960 ; CHECK-NEXT: .cfi_def_cfa_offset 16
961 ; CHECK-NEXT: popq %rbp
962 ; CHECK-NEXT: .cfi_def_cfa_offset 8
964 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
965 %2 = sitofp i64 %a0 to float
966 %3 = insertelement <4 x float> %b0, float %2, i64 0
970 define double @stack_fold_cvtss2sd(float %a0) minsize {
971 ; CHECK-LABEL: stack_fold_cvtss2sd:
973 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
976 ; CHECK-NEXT: #NO_APP
977 ; CHECK-NEXT: cvtss2sd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
979 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
980 %2 = fpext float %a0 to double
984 define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize {
985 ; CHECK-LABEL: stack_fold_cvtss2sd_int:
987 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
990 ; CHECK-NEXT: #NO_APP
991 ; CHECK-NEXT: xorps %xmm0, %xmm0
992 ; CHECK-NEXT: cvtss2sd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
993 ; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
995 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
996 %2 = extractelement <4 x float> %a0, i64 0
997 %3 = fpext float %2 to double
998 %4 = insertelement <2 x double> zeroinitializer, double %3, i64 0
1002 ; TODO stack_fold_cvtss2si
1004 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) {
1005 ; CHECK-LABEL: stack_fold_cvtss2si_int:
1007 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1010 ; CHECK-NEXT: #NO_APP
1011 ; CHECK-NEXT: cvtss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
1013 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1014 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
1017 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
1019 ; TODO stack_fold_cvtss2si64
1021 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) {
1022 ; CHECK-LABEL: stack_fold_cvtss2si64_int:
1024 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1027 ; CHECK-NEXT: #NO_APP
1028 ; CHECK-NEXT: cvtss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
1030 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1031 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0)
1034 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
1036 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) {
1037 ; CHECK-LABEL: stack_fold_cvttpd2dq:
1039 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1042 ; CHECK-NEXT: #NO_APP
1043 ; CHECK-NEXT: cvttpd2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1045 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1046 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1049 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1051 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) {
1052 ; CHECK-LABEL: stack_fold_cvttps2dq:
1054 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1057 ; CHECK-NEXT: #NO_APP
1058 ; CHECK-NEXT: cvttps2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1060 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1061 %2 = fptosi <4 x float> %a0 to <4 x i32>
1065 define i32 @stack_fold_cvttsd2si(double %a0) {
1066 ; CHECK-LABEL: stack_fold_cvttsd2si:
1068 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1071 ; CHECK-NEXT: #NO_APP
1072 ; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 8-byte Folded Reload
1074 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1075 %2 = fptosi double %a0 to i32
1079 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) {
1080 ; CHECK-LABEL: stack_fold_cvttsd2si_int:
1082 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1085 ; CHECK-NEXT: #NO_APP
1086 ; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
1088 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1089 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
1092 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
1094 define i64 @stack_fold_cvttsd2si64(double %a0) {
1095 ; CHECK-LABEL: stack_fold_cvttsd2si64:
1097 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1100 ; CHECK-NEXT: #NO_APP
1101 ; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
1103 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1104 %2 = fptosi double %a0 to i64
1108 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) {
1109 ; CHECK-LABEL: stack_fold_cvttsd2si64_int:
1111 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1114 ; CHECK-NEXT: #NO_APP
1115 ; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
1117 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1118 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0)
1121 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
1123 define i32 @stack_fold_cvttss2si(float %a0) {
1124 ; CHECK-LABEL: stack_fold_cvttss2si:
1126 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1129 ; CHECK-NEXT: #NO_APP
1130 ; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
1132 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1133 %2 = fptosi float %a0 to i32
1137 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) {
1138 ; CHECK-LABEL: stack_fold_cvttss2si_int:
1140 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1143 ; CHECK-NEXT: #NO_APP
1144 ; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
1146 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1147 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
1150 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
1152 define i64 @stack_fold_cvttss2si64(float %a0) {
1153 ; CHECK-LABEL: stack_fold_cvttss2si64:
1155 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1158 ; CHECK-NEXT: #NO_APP
1159 ; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 4-byte Folded Reload
1161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1162 %2 = fptosi float %a0 to i64
1166 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) {
1167 ; CHECK-LABEL: stack_fold_cvttss2si64_int:
1169 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1172 ; CHECK-NEXT: #NO_APP
1173 ; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
1175 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1176 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0)
1179 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
1181 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
1182 ; CHECK-LABEL: stack_fold_divpd:
1184 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1187 ; CHECK-NEXT: #NO_APP
1188 ; CHECK-NEXT: divpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1190 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1191 %2 = fdiv <2 x double> %a0, %a1
1195 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) {
1196 ; CHECK-LABEL: stack_fold_divps:
1198 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1201 ; CHECK-NEXT: #NO_APP
1202 ; CHECK-NEXT: divps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1204 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1205 %2 = fdiv <4 x float> %a0, %a1
1209 define double @stack_fold_divsd(double %a0, double %a1) {
1210 ; CHECK-LABEL: stack_fold_divsd:
1212 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1215 ; CHECK-NEXT: #NO_APP
1216 ; CHECK-NEXT: divsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1218 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1219 %2 = fdiv double %a0, %a1
1223 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
1224 ; CHECK-LABEL: stack_fold_divsd_int:
1226 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1229 ; CHECK-NEXT: #NO_APP
1230 ; CHECK-NEXT: divsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1232 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1233 %2 = extractelement <2 x double> %a0, i32 0
1234 %3 = extractelement <2 x double> %a1, i32 0
1235 %4 = fdiv double %2, %3
1236 %5 = insertelement <2 x double> %a0, double %4, i32 0
1239 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
1241 define float @stack_fold_divss(float %a0, float %a1) {
1242 ; CHECK-LABEL: stack_fold_divss:
1244 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1247 ; CHECK-NEXT: #NO_APP
1248 ; CHECK-NEXT: divss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1250 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1251 %2 = fdiv float %a0, %a1
1255 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
1256 ; CHECK-LABEL: stack_fold_divss_int:
1258 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1261 ; CHECK-NEXT: #NO_APP
1262 ; CHECK-NEXT: divss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1264 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1265 %2 = extractelement <4 x float> %a0, i32 0
1266 %3 = extractelement <4 x float> %a1, i32 0
1267 %4 = fdiv float %2, %3
1268 %5 = insertelement <4 x float> %a0, float %4, i32 0
1271 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
1273 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) {
1274 ; CHECK-LABEL: stack_fold_dppd:
1276 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1279 ; CHECK-NEXT: #NO_APP
1280 ; CHECK-NEXT: dppd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1282 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1283 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
1286 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
1288 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) {
1289 ; CHECK-LABEL: stack_fold_dpps:
1291 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1294 ; CHECK-NEXT: #NO_APP
1295 ; CHECK-NEXT: dpps $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1297 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1298 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
1301 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
1303 define i32 @stack_fold_extractps(<4 x float> %a0, <4 x float> %a1) {
1304 ; CHECK-LABEL: stack_fold_extractps:
1306 ; CHECK-NEXT: pushq %rbp
1307 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1308 ; CHECK-NEXT: pushq %r15
1309 ; CHECK-NEXT: .cfi_def_cfa_offset 24
1310 ; CHECK-NEXT: pushq %r14
1311 ; CHECK-NEXT: .cfi_def_cfa_offset 32
1312 ; CHECK-NEXT: pushq %r13
1313 ; CHECK-NEXT: .cfi_def_cfa_offset 40
1314 ; CHECK-NEXT: pushq %r12
1315 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1316 ; CHECK-NEXT: pushq %rbx
1317 ; CHECK-NEXT: .cfi_def_cfa_offset 56
1318 ; CHECK-NEXT: .cfi_offset %rbx, -56
1319 ; CHECK-NEXT: .cfi_offset %r12, -48
1320 ; CHECK-NEXT: .cfi_offset %r13, -40
1321 ; CHECK-NEXT: .cfi_offset %r14, -32
1322 ; CHECK-NEXT: .cfi_offset %r15, -24
1323 ; CHECK-NEXT: .cfi_offset %rbp, -16
1324 ; CHECK-NEXT: addps %xmm1, %xmm0
1325 ; CHECK-NEXT: extractps $1, %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1328 ; CHECK-NEXT: #NO_APP
1329 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
1330 ; CHECK-NEXT: popq %rbx
1331 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1332 ; CHECK-NEXT: popq %r12
1333 ; CHECK-NEXT: .cfi_def_cfa_offset 40
1334 ; CHECK-NEXT: popq %r13
1335 ; CHECK-NEXT: .cfi_def_cfa_offset 32
1336 ; CHECK-NEXT: popq %r14
1337 ; CHECK-NEXT: .cfi_def_cfa_offset 24
1338 ; CHECK-NEXT: popq %r15
1339 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1340 ; CHECK-NEXT: popq %rbp
1341 ; CHECK-NEXT: .cfi_def_cfa_offset 8
1343 ; fadd forces execution domain
1344 %1 = fadd <4 x float> %a0, %a1
1345 %2 = extractelement <4 x float> %1, i32 1
1346 %3 = bitcast float %2 to i32
1347 %4 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
1351 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) {
1352 ; CHECK-LABEL: stack_fold_haddpd:
1354 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1357 ; CHECK-NEXT: #NO_APP
1358 ; CHECK-NEXT: haddpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1360 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1361 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
1364 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
1366 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) {
1367 ; CHECK-LABEL: stack_fold_haddps:
1369 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1372 ; CHECK-NEXT: #NO_APP
1373 ; CHECK-NEXT: haddps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1375 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1376 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
1379 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
1381 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) {
1382 ; CHECK-LABEL: stack_fold_hsubpd:
1384 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1387 ; CHECK-NEXT: #NO_APP
1388 ; CHECK-NEXT: hsubpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1390 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1391 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
1394 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
1396 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
1397 ; CHECK-LABEL: stack_fold_hsubps:
1399 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1402 ; CHECK-NEXT: #NO_APP
1403 ; CHECK-NEXT: hsubps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1405 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1406 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
1409 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
1411 define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
1412 ; CHECK-LABEL: stack_fold_insertps:
1414 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1417 ; CHECK-NEXT: #NO_APP
1418 ; CHECK-NEXT: insertps $17, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1419 ; CHECK-NEXT: # xmm0 = zero,mem[0],xmm0[2,3]
1421 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1422 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
1425 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
1427 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
1428 ; CHECK-LABEL: stack_fold_maxpd:
1430 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1433 ; CHECK-NEXT: #NO_APP
1434 ; CHECK-NEXT: maxpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1436 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1437 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1440 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1442 define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
1443 ; CHECK-LABEL: stack_fold_maxpd_commutable:
1445 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1448 ; CHECK-NEXT: #NO_APP
1449 ; CHECK-NEXT: maxpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1451 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1452 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1456 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
1457 ; CHECK-LABEL: stack_fold_maxps:
1459 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1462 ; CHECK-NEXT: #NO_APP
1463 ; CHECK-NEXT: maxps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1465 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1466 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
1469 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1471 define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
1472 ; CHECK-LABEL: stack_fold_maxps_commutable:
1474 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1477 ; CHECK-NEXT: #NO_APP
1478 ; CHECK-NEXT: maxps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1481 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
1485 define double @stack_fold_maxsd(double %a0, double %a1) #0 {
1486 ; CHECK-LABEL: stack_fold_maxsd:
1488 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1491 ; CHECK-NEXT: #NO_APP
1492 ; CHECK-NEXT: maxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1494 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1495 %2 = fcmp ogt double %a0, %a1
1496 %3 = select i1 %2, double %a0, double %a1
1500 define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 {
1501 ; CHECK-LABEL: stack_fold_maxsd_commutable:
1503 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1506 ; CHECK-NEXT: #NO_APP
1507 ; CHECK-NEXT: maxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1509 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1510 %2 = fcmp ogt double %a0, %a1
1511 %3 = select i1 %2, double %a0, double %a1
1515 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 {
1516 ; CHECK-LABEL: stack_fold_maxsd_int:
1518 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1521 ; CHECK-NEXT: #NO_APP
1522 ; CHECK-NEXT: maxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1524 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1525 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1528 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1530 define float @stack_fold_maxss(float %a0, float %a1) #0 {
1531 ; CHECK-LABEL: stack_fold_maxss:
1533 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1536 ; CHECK-NEXT: #NO_APP
1537 ; CHECK-NEXT: maxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1540 %2 = fcmp ogt float %a0, %a1
1541 %3 = select i1 %2, float %a0, float %a1
1545 define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 {
1546 ; CHECK-LABEL: stack_fold_maxss_commutable:
1548 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1551 ; CHECK-NEXT: #NO_APP
1552 ; CHECK-NEXT: maxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1554 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1555 %2 = fcmp ogt float %a0, %a1
1556 %3 = select i1 %2, float %a0, float %a1
1560 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 {
1561 ; CHECK-LABEL: stack_fold_maxss_int:
1563 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1566 ; CHECK-NEXT: #NO_APP
1567 ; CHECK-NEXT: maxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1569 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1570 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
1573 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1575 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 {
1576 ; CHECK-LABEL: stack_fold_minpd:
1578 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1581 ; CHECK-NEXT: #NO_APP
1582 ; CHECK-NEXT: minpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1585 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1588 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1590 define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
1591 ; CHECK-LABEL: stack_fold_minpd_commutable:
1593 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1596 ; CHECK-NEXT: #NO_APP
1597 ; CHECK-NEXT: minpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1599 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1600 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1604 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
1605 ; CHECK-LABEL: stack_fold_minps:
1607 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1610 ; CHECK-NEXT: #NO_APP
1611 ; CHECK-NEXT: minps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1613 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1614 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1617 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1619 define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
1620 ; CHECK-LABEL: stack_fold_minps_commutable:
1622 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1625 ; CHECK-NEXT: #NO_APP
1626 ; CHECK-NEXT: minps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1628 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1629 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1633 define double @stack_fold_minsd(double %a0, double %a1) #0 {
1634 ; CHECK-LABEL: stack_fold_minsd:
1636 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1639 ; CHECK-NEXT: #NO_APP
1640 ; CHECK-NEXT: minsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1642 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1643 %2 = fcmp olt double %a0, %a1
1644 %3 = select i1 %2, double %a0, double %a1
1648 define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 {
1649 ; CHECK-LABEL: stack_fold_minsd_commutable:
1651 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1654 ; CHECK-NEXT: #NO_APP
1655 ; CHECK-NEXT: minsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1657 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1658 %2 = fcmp olt double %a0, %a1
1659 %3 = select i1 %2, double %a0, double %a1
1663 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) #0 {
1664 ; CHECK-LABEL: stack_fold_minsd_int:
1666 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1669 ; CHECK-NEXT: #NO_APP
1670 ; CHECK-NEXT: minsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1672 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1673 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1676 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1678 define float @stack_fold_minss(float %a0, float %a1) #0 {
1679 ; CHECK-LABEL: stack_fold_minss:
1681 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1684 ; CHECK-NEXT: #NO_APP
1685 ; CHECK-NEXT: minss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1687 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1688 %2 = fcmp olt float %a0, %a1
1689 %3 = select i1 %2, float %a0, float %a1
1693 define float @stack_fold_minss_commutable(float %a0, float %a1) #1 {
1694 ; CHECK-LABEL: stack_fold_minss_commutable:
1696 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1699 ; CHECK-NEXT: #NO_APP
1700 ; CHECK-NEXT: minss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1702 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1703 %2 = fcmp olt float %a0, %a1
1704 %3 = select i1 %2, float %a0, float %a1
1708 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 {
1709 ; CHECK-LABEL: stack_fold_minss_int:
1711 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1714 ; CHECK-NEXT: #NO_APP
1715 ; CHECK-NEXT: minss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1717 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1718 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
1721 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1723 define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
1724 ; CHECK-LABEL: stack_fold_movddup:
1726 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1729 ; CHECK-NEXT: #NO_APP
1730 ; CHECK-NEXT: movddup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1731 ; CHECK-NEXT: # xmm0 = mem[0,0]
1733 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1734 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1737 ; TODO stack_fold_movhpd (load / store)
1738 ; TODO stack_fold_movhps (load / store)
1740 ; TODO stack_fold_movlpd (load / store)
1741 ; TODO stack_fold_movlps (load / store)
1743 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
1744 ; CHECK-LABEL: stack_fold_movshdup:
1746 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1749 ; CHECK-NEXT: #NO_APP
1750 ; CHECK-NEXT: movshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1751 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1753 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1754 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1758 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
1759 ; CHECK-LABEL: stack_fold_movsldup:
1761 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1764 ; CHECK-NEXT: #NO_APP
1765 ; CHECK-NEXT: movsldup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1766 ; CHECK-NEXT: # xmm0 = mem[0,0,2,2]
1768 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1769 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1773 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) {
1774 ; CHECK-LABEL: stack_fold_mulpd:
1776 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1779 ; CHECK-NEXT: #NO_APP
1780 ; CHECK-NEXT: mulpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1783 %2 = fmul <2 x double> %a0, %a1
1787 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) {
1788 ; CHECK-LABEL: stack_fold_mulps:
1790 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1793 ; CHECK-NEXT: #NO_APP
1794 ; CHECK-NEXT: mulps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1796 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1797 %2 = fmul <4 x float> %a0, %a1
1801 define double @stack_fold_mulsd(double %a0, double %a1) {
1802 ; CHECK-LABEL: stack_fold_mulsd:
1804 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1807 ; CHECK-NEXT: #NO_APP
1808 ; CHECK-NEXT: mulsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1810 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1811 %2 = fmul double %a0, %a1
1815 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
1816 ; CHECK-LABEL: stack_fold_mulsd_int:
1818 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1821 ; CHECK-NEXT: #NO_APP
1822 ; CHECK-NEXT: mulsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1824 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1825 %2 = extractelement <2 x double> %a0, i32 0
1826 %3 = extractelement <2 x double> %a1, i32 0
1827 %4 = fmul double %2, %3
1828 %5 = insertelement <2 x double> %a0, double %4, i32 0
1832 define float @stack_fold_mulss(float %a0, float %a1) {
1833 ; CHECK-LABEL: stack_fold_mulss:
1835 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1838 ; CHECK-NEXT: #NO_APP
1839 ; CHECK-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1841 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1842 %2 = fmul float %a0, %a1
1846 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
1847 ; CHECK-LABEL: stack_fold_mulss_int:
1849 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1852 ; CHECK-NEXT: #NO_APP
1853 ; CHECK-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1855 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1856 %2 = extractelement <4 x float> %a0, i32 0
1857 %3 = extractelement <4 x float> %a1, i32 0
1858 %4 = fmul float %2, %3
1859 %5 = insertelement <4 x float> %a0, float %4, i32 0
1863 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
1864 ; CHECK-LABEL: stack_fold_orpd:
1866 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1869 ; CHECK-NEXT: #NO_APP
1870 ; CHECK-NEXT: orpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1871 ; CHECK-NEXT: xorpd %xmm1, %xmm1
1872 ; CHECK-NEXT: addpd %xmm1, %xmm0
1874 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1875 %2 = bitcast <2 x double> %a0 to <2 x i64>
1876 %3 = bitcast <2 x double> %a1 to <2 x i64>
1877 %4 = or <2 x i64> %2, %3
1878 %5 = bitcast <2 x i64> %4 to <2 x double>
1879 ; fadd forces execution domain
1880 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
1884 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) {
1885 ; CHECK-LABEL: stack_fold_orps:
1887 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1890 ; CHECK-NEXT: #NO_APP
1891 ; CHECK-NEXT: orps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1892 ; CHECK-NEXT: xorps %xmm1, %xmm1
1893 ; CHECK-NEXT: addps %xmm1, %xmm0
1895 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1896 %2 = bitcast <4 x float> %a0 to <2 x i64>
1897 %3 = bitcast <4 x float> %a1 to <2 x i64>
1898 %4 = or <2 x i64> %2, %3
1899 %5 = bitcast <2 x i64> %4 to <4 x float>
1900 ; fadd forces execution domain
1901 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
1905 ; TODO stack_fold_rcpps
1907 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) {
1908 ; CHECK-LABEL: stack_fold_rcpps_int:
1910 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1913 ; CHECK-NEXT: #NO_APP
1914 ; CHECK-NEXT: rcpps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1916 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1917 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
1920 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1922 ; TODO stack_fold_rcpss
1924 define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0, <4 x float> %a1) optsize {
1925 ; CHECK-LABEL: stack_fold_rcpss_int:
1927 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1928 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1931 ; CHECK-NEXT: #NO_APP
1932 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1933 ; CHECK-NEXT: rcpss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1935 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1936 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a1)
1937 %3 = extractelement <4 x float> %2, i32 0
1938 %4 = insertelement <4 x float> %a0, float %3, i32 0
1941 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
1943 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
1944 ; CHECK-LABEL: stack_fold_roundpd:
1946 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1949 ; CHECK-NEXT: #NO_APP
1950 ; CHECK-NEXT: roundpd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1952 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1953 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
1956 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1958 define <4 x float> @stack_fold_roundps(<4 x float> %a0) {
1959 ; CHECK-LABEL: stack_fold_roundps:
1961 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1964 ; CHECK-NEXT: #NO_APP
1965 ; CHECK-NEXT: roundps $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1967 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1968 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
1971 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1973 define double @stack_fold_roundsd(double %a0) optsize {
1974 ; CHECK-LABEL: stack_fold_roundsd:
1976 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1979 ; CHECK-NEXT: #NO_APP
1980 ; CHECK-NEXT: xorps %xmm0, %xmm0
1981 ; CHECK-NEXT: roundsd $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1983 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1984 %2 = call double @llvm.floor.f64(double %a0)
1987 declare double @llvm.floor.f64(double) nounwind readnone
1989 define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
1990 ; CHECK-LABEL: stack_fold_roundsd_int:
1992 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1993 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1996 ; CHECK-NEXT: #NO_APP
1997 ; CHECK-NEXT: movapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1998 ; CHECK-NEXT: roundsd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2000 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2001 %2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
2004 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
2006 define float @stack_fold_roundss(float %a0) minsize {
2007 ; CHECK-LABEL: stack_fold_roundss:
2009 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2012 ; CHECK-NEXT: #NO_APP
2013 ; CHECK-NEXT: roundss $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
2015 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2016 %2 = call float @llvm.floor.f32(float %a0)
2019 declare float @llvm.floor.f32(float) nounwind readnone
2021 define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize {
2022 ; CHECK-LABEL: stack_fold_roundss_int:
2024 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2025 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2028 ; CHECK-NEXT: #NO_APP
2029 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
2030 ; CHECK-NEXT: roundss $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2032 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2033 %2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
2036 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
2038 ; TODO stack_fold_rsqrtps
2040 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) {
2041 ; CHECK-LABEL: stack_fold_rsqrtps_int:
2043 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2046 ; CHECK-NEXT: #NO_APP
2047 ; CHECK-NEXT: rsqrtps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2049 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2050 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
2053 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
2055 ; TODO stack_fold_rsqrtss
2057 define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize {
2058 ; CHECK-LABEL: stack_fold_rsqrtss_int:
2060 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2061 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2064 ; CHECK-NEXT: #NO_APP
2065 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
2066 ; CHECK-NEXT: rsqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2068 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2069 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a1)
2070 %3 = extractelement <4 x float> %2, i32 0
2071 %4 = insertelement <4 x float> %a0, float %3, i32 0
2074 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
2076 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
2077 ; CHECK-LABEL: stack_fold_shufpd:
2079 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2082 ; CHECK-NEXT: #NO_APP
2083 ; CHECK-NEXT: shufpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2084 ; CHECK-NEXT: # xmm0 = xmm0[1],mem[0]
2085 ; CHECK-NEXT: xorpd %xmm1, %xmm1
2086 ; CHECK-NEXT: addpd %xmm1, %xmm0
2088 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2089 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
2090 ; fadd forces execution domain
2091 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
2095 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) {
2096 ; CHECK-LABEL: stack_fold_shufps:
2098 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2101 ; CHECK-NEXT: #NO_APP
2102 ; CHECK-NEXT: shufps $200, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2103 ; CHECK-NEXT: # xmm0 = xmm0[0,2],mem[0,3]
2105 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2106 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7>
2110 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) {
2111 ; CHECK-LABEL: stack_fold_sqrtpd:
2113 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2116 ; CHECK-NEXT: #NO_APP
2117 ; CHECK-NEXT: sqrtpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2119 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2120 %2 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
2124 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) {
2125 ; CHECK-LABEL: stack_fold_sqrtps:
2127 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2130 ; CHECK-NEXT: #NO_APP
2131 ; CHECK-NEXT: sqrtps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2133 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2134 %2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0)
2138 define double @stack_fold_sqrtsd(double %a0) optsize {
2139 ; CHECK-LABEL: stack_fold_sqrtsd:
2141 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2144 ; CHECK-NEXT: #NO_APP
2145 ; CHECK-NEXT: xorps %xmm0, %xmm0
2146 ; CHECK-NEXT: sqrtsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
2148 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2149 %2 = call double @llvm.sqrt.f64(double %a0)
2152 declare double @llvm.sqrt.f64(double) nounwind readnone
2154 define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
2155 ; CHECK-LABEL: stack_fold_sqrtsd_int:
2157 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2158 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2161 ; CHECK-NEXT: #NO_APP
2162 ; CHECK-NEXT: movapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
2163 ; CHECK-NEXT: sqrtsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2165 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2166 %2 = extractelement <2 x double> %a1, i64 0
2167 %3 = call double @llvm.sqrt.f64(double %2)
2168 %4 = insertelement <2 x double> %a1, double %3, i64 0
2169 %5 = extractelement <2 x double> %4, i32 0
2170 %6 = insertelement <2 x double> %a0, double %5, i32 0
2174 define float @stack_fold_sqrtss(float %a0) minsize {
2175 ; CHECK-LABEL: stack_fold_sqrtss:
2177 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2180 ; CHECK-NEXT: #NO_APP
2181 ; CHECK-NEXT: sqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
2183 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2184 %2 = call float @llvm.sqrt.f32(float %a0)
2187 declare float @llvm.sqrt.f32(float) nounwind readnone
2189 define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize {
2190 ; CHECK-LABEL: stack_fold_sqrtss_int:
2192 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2193 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2196 ; CHECK-NEXT: #NO_APP
2197 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
2198 ; CHECK-NEXT: sqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2200 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2201 %2 = extractelement <4 x float> %a1, i64 0
2202 %3 = call float @llvm.sqrt.f32(float %2)
2203 %4 = insertelement <4 x float> %a1, float %3, i64 0
2204 %5 = extractelement <4 x float> %4, i32 0
2205 %6 = insertelement <4 x float> %a0, float %5, i32 0
2209 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
2210 ; CHECK-LABEL: stack_fold_subpd:
2212 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2215 ; CHECK-NEXT: #NO_APP
2216 ; CHECK-NEXT: subpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2218 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2219 %2 = fsub <2 x double> %a0, %a1
2223 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) {
2224 ; CHECK-LABEL: stack_fold_subps:
2226 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2229 ; CHECK-NEXT: #NO_APP
2230 ; CHECK-NEXT: subps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2232 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2233 %2 = fsub <4 x float> %a0, %a1
2237 define double @stack_fold_subsd(double %a0, double %a1) {
2238 ; CHECK-LABEL: stack_fold_subsd:
2240 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2243 ; CHECK-NEXT: #NO_APP
2244 ; CHECK-NEXT: subsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
2246 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2247 %2 = fsub double %a0, %a1
2251 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
2252 ; CHECK-LABEL: stack_fold_subsd_int:
2254 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2257 ; CHECK-NEXT: #NO_APP
2258 ; CHECK-NEXT: subsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2260 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2261 %2 = extractelement <2 x double> %a0, i32 0
2262 %3 = extractelement <2 x double> %a1, i32 0
2263 %4 = fsub double %2, %3
2264 %5 = insertelement <2 x double> %a0, double %4, i32 0
2268 define float @stack_fold_subss(float %a0, float %a1) {
2269 ; CHECK-LABEL: stack_fold_subss:
2271 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2274 ; CHECK-NEXT: #NO_APP
2275 ; CHECK-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
2277 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2278 %2 = fsub float %a0, %a1
2282 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
2283 ; CHECK-LABEL: stack_fold_subss_int:
2285 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2288 ; CHECK-NEXT: #NO_APP
2289 ; CHECK-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2291 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2292 %2 = extractelement <4 x float> %a0, i32 0
2293 %3 = extractelement <4 x float> %a1, i32 0
2294 %4 = fsub float %2, %3
2295 %5 = insertelement <4 x float> %a0, float %4, i32 0
2299 define i32 @stack_fold_ucomisd(double %a0, double %a1) {
2300 ; CHECK-LABEL: stack_fold_ucomisd:
2302 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2305 ; CHECK-NEXT: #NO_APP
2306 ; CHECK-NEXT: xorl %eax, %eax
2307 ; CHECK-NEXT: ucomisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
2308 ; CHECK-NEXT: sete %al
2309 ; CHECK-NEXT: leal -1(%rax,%rax), %eax
2311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2312 %2 = fcmp ueq double %a0, %a1
2313 %3 = select i1 %2, i32 1, i32 -1
2317 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) {
2318 ; CHECK-LABEL: stack_fold_ucomisd_int:
2320 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2323 ; CHECK-NEXT: #NO_APP
2324 ; CHECK-NEXT: ucomisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2325 ; CHECK-NEXT: setnp %al
2326 ; CHECK-NEXT: sete %cl
2327 ; CHECK-NEXT: andb %al, %cl
2328 ; CHECK-NEXT: movzbl %cl, %eax
2330 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2331 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
2334 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
2336 define i32 @stack_fold_ucomiss(float %a0, float %a1) {
2337 ; CHECK-LABEL: stack_fold_ucomiss:
2339 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2342 ; CHECK-NEXT: #NO_APP
2343 ; CHECK-NEXT: xorl %eax, %eax
2344 ; CHECK-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
2345 ; CHECK-NEXT: sete %al
2346 ; CHECK-NEXT: leal -1(%rax,%rax), %eax
2348 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2349 %2 = fcmp ueq float %a0, %a1
2350 %3 = select i1 %2, i32 1, i32 -1
2354 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) {
2355 ; CHECK-LABEL: stack_fold_ucomiss_int:
2357 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2360 ; CHECK-NEXT: #NO_APP
2361 ; CHECK-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2362 ; CHECK-NEXT: setnp %al
2363 ; CHECK-NEXT: sete %cl
2364 ; CHECK-NEXT: andb %al, %cl
2365 ; CHECK-NEXT: movzbl %cl, %eax
2367 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2368 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
2371 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
2373 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) {
2374 ; CHECK-LABEL: stack_fold_unpckhpd:
2376 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2379 ; CHECK-NEXT: #NO_APP
2380 ; CHECK-NEXT: unpckhpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2381 ; CHECK-NEXT: # xmm0 = xmm0[1],mem[1]
2382 ; CHECK-NEXT: xorpd %xmm1, %xmm1
2383 ; CHECK-NEXT: addpd %xmm1, %xmm0
2385 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2386 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
2387 ; fadd forces execution domain
2388 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
2392 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) {
2393 ; CHECK-LABEL: stack_fold_unpckhps:
2395 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2398 ; CHECK-NEXT: #NO_APP
2399 ; CHECK-NEXT: unpckhps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2400 ; CHECK-NEXT: # xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
2401 ; CHECK-NEXT: xorps %xmm1, %xmm1
2402 ; CHECK-NEXT: addps %xmm1, %xmm0
2404 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2405 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
2406 ; fadd forces execution domain
2407 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
2411 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) {
2412 ; CHECK-LABEL: stack_fold_unpcklpd:
2414 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2417 ; CHECK-NEXT: #NO_APP
2418 ; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2419 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
2420 ; CHECK-NEXT: xorpd %xmm1, %xmm1
2421 ; CHECK-NEXT: addpd %xmm1, %xmm0
2423 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2424 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
2425 ; fadd forces execution domain
2426 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
2430 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) {
2431 ; CHECK-LABEL: stack_fold_unpcklps:
2433 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2436 ; CHECK-NEXT: #NO_APP
2437 ; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2438 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
2439 ; CHECK-NEXT: xorps %xmm1, %xmm1
2440 ; CHECK-NEXT: addps %xmm1, %xmm0
2442 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2443 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
2444 ; fadd forces execution domain
2445 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
2449 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
2450 ; CHECK-LABEL: stack_fold_xorpd:
2452 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2455 ; CHECK-NEXT: #NO_APP
2456 ; CHECK-NEXT: xorpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2457 ; CHECK-NEXT: xorpd %xmm1, %xmm1
2458 ; CHECK-NEXT: addpd %xmm1, %xmm0
2460 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2461 %2 = bitcast <2 x double> %a0 to <2 x i64>
2462 %3 = bitcast <2 x double> %a1 to <2 x i64>
2463 %4 = xor <2 x i64> %2, %3
2464 %5 = bitcast <2 x i64> %4 to <2 x double>
2465 ; fadd forces execution domain
2466 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
2470 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
2471 ; CHECK-LABEL: stack_fold_xorps:
2473 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2476 ; CHECK-NEXT: #NO_APP
2477 ; CHECK-NEXT: xorps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2478 ; CHECK-NEXT: xorps %xmm1, %xmm1
2479 ; CHECK-NEXT: addps %xmm1, %xmm0
2481 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2482 %2 = bitcast <4 x float> %a0 to <2 x i64>
2483 %3 = bitcast <4 x float> %a1 to <2 x i64>
2484 %4 = xor <2 x i64> %2, %3
2485 %5 = bitcast <2 x i64> %4 to <4 x float>
2486 ; fadd forces execution domain
2487 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
2491 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
2492 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
2494 attributes #0 = { "unsafe-fp-math"="false" }
2495 attributes #1 = { "unsafe-fp-math"="true" }