1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-unknown"
7 ; Stack reload folding tests.
9 ; By including a nop call with sideeffects we can force a partial register spill of the
10 ; relevant registers and check that the reload is correctly folded into the instruction.
12 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) {
13 ; CHECK-LABEL: stack_fold_addpd:
15 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
19 ; CHECK-NEXT: addpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
21 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
22 %2 = fadd <2 x double> %a0, %a1
26 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) {
27 ; CHECK-LABEL: stack_fold_addps:
29 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
33 ; CHECK-NEXT: addps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
35 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
36 %2 = fadd <4 x float> %a0, %a1
40 define double @stack_fold_addsd(double %a0, double %a1) {
41 ; CHECK-LABEL: stack_fold_addsd:
43 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
47 ; CHECK-NEXT: addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
49 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
50 %2 = fadd double %a0, %a1
54 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
55 ; CHECK-LABEL: stack_fold_addsd_int:
57 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
61 ; CHECK-NEXT: addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
63 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
64 %2 = extractelement <2 x double> %a0, i32 0
65 %3 = extractelement <2 x double> %a1, i32 0
66 %4 = fadd double %2, %3
67 %5 = insertelement <2 x double> %a0, double %4, i32 0
71 define float @stack_fold_addss(float %a0, float %a1) {
72 ; CHECK-LABEL: stack_fold_addss:
74 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
78 ; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
80 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
81 %2 = fadd float %a0, %a1
85 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
86 ; CHECK-LABEL: stack_fold_addss_int:
88 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
92 ; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
94 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
95 %2 = extractelement <4 x float> %a0, i32 0
96 %3 = extractelement <4 x float> %a1, i32 0
97 %4 = fadd float %2, %3
98 %5 = insertelement <4 x float> %a0, float %4, i32 0
102 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) {
103 ; CHECK-LABEL: stack_fold_addsubpd:
105 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
108 ; CHECK-NEXT: #NO_APP
109 ; CHECK-NEXT: addsubpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
111 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
112 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
115 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
117 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) {
118 ; CHECK-LABEL: stack_fold_addsubps:
120 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
123 ; CHECK-NEXT: #NO_APP
124 ; CHECK-NEXT: addsubps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
126 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
127 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
130 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
132 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
133 ; CHECK-LABEL: stack_fold_andnpd:
135 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
138 ; CHECK-NEXT: #NO_APP
139 ; CHECK-NEXT: andnpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
140 ; CHECK-NEXT: xorpd %xmm1, %xmm1
141 ; CHECK-NEXT: addpd %xmm1, %xmm0
143 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
144 %2 = bitcast <2 x double> %a0 to <2 x i64>
145 %3 = bitcast <2 x double> %a1 to <2 x i64>
146 %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
147 %5 = and <2 x i64> %4, %3
148 %6 = bitcast <2 x i64> %5 to <2 x double>
149 ; fadd forces execution domain
150 %7 = fadd <2 x double> %6, <double 0x0, double 0x0>
154 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) {
155 ; CHECK-LABEL: stack_fold_andnps:
157 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
160 ; CHECK-NEXT: #NO_APP
161 ; CHECK-NEXT: andnps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
162 ; CHECK-NEXT: xorps %xmm1, %xmm1
163 ; CHECK-NEXT: addps %xmm1, %xmm0
165 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
166 %2 = bitcast <4 x float> %a0 to <2 x i64>
167 %3 = bitcast <4 x float> %a1 to <2 x i64>
168 %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
169 %5 = and <2 x i64> %4, %3
170 %6 = bitcast <2 x i64> %5 to <4 x float>
171 ; fadd forces execution domain
172 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0>
176 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
177 ; CHECK-LABEL: stack_fold_andpd:
179 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
182 ; CHECK-NEXT: #NO_APP
183 ; CHECK-NEXT: andpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
184 ; CHECK-NEXT: xorpd %xmm1, %xmm1
185 ; CHECK-NEXT: addpd %xmm1, %xmm0
187 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
188 %2 = bitcast <2 x double> %a0 to <2 x i64>
189 %3 = bitcast <2 x double> %a1 to <2 x i64>
190 %4 = and <2 x i64> %2, %3
191 %5 = bitcast <2 x i64> %4 to <2 x double>
192 ; fadd forces execution domain
193 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
197 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) {
198 ; CHECK-LABEL: stack_fold_andps:
200 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
203 ; CHECK-NEXT: #NO_APP
204 ; CHECK-NEXT: andps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
205 ; CHECK-NEXT: xorps %xmm1, %xmm1
206 ; CHECK-NEXT: addps %xmm1, %xmm0
208 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
209 %2 = bitcast <4 x float> %a0 to <2 x i64>
210 %3 = bitcast <4 x float> %a1 to <2 x i64>
211 %4 = and <2 x i64> %2, %3
212 %5 = bitcast <2 x i64> %4 to <4 x float>
213 ; fadd forces execution domain
214 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
218 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) {
219 ; CHECK-LABEL: stack_fold_blendpd:
221 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
224 ; CHECK-NEXT: #NO_APP
225 ; CHECK-NEXT: blendpd $2, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
226 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[1]
227 ; CHECK-NEXT: xorpd %xmm1, %xmm1
228 ; CHECK-NEXT: addpd %xmm1, %xmm0
230 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
231 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1
232 ; fadd forces execution domain
233 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
237 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) {
238 ; CHECK-LABEL: stack_fold_blendps:
240 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
243 ; CHECK-NEXT: #NO_APP
244 ; CHECK-NEXT: blendps $6, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
245 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[1,2],xmm0[3]
246 ; CHECK-NEXT: xorps %xmm1, %xmm1
247 ; CHECK-NEXT: addps %xmm1, %xmm0
249 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
250 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1
251 ; fadd forces execution domain
252 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
256 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) {
257 ; CHECK-LABEL: stack_fold_blendvpd:
259 ; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
260 ; CHECK-NEXT: movapd %xmm1, %xmm2
263 ; CHECK-NEXT: #NO_APP
264 ; CHECK-NEXT: blendvpd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
265 ; CHECK-NEXT: movapd %xmm2, %xmm0
267 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
268 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0)
271 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
273 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) {
274 ; CHECK-LABEL: stack_fold_blendvps:
276 ; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
277 ; CHECK-NEXT: movaps %xmm1, %xmm2
280 ; CHECK-NEXT: #NO_APP
281 ; CHECK-NEXT: blendvps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
282 ; CHECK-NEXT: movaps %xmm2, %xmm0
284 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
285 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0)
288 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
290 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) {
291 ; CHECK-LABEL: stack_fold_cmppd:
293 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
296 ; CHECK-NEXT: #NO_APP
297 ; CHECK-NEXT: cmpeqpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
299 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
300 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
303 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
305 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
306 ; CHECK-LABEL: stack_fold_cmpps:
308 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
311 ; CHECK-NEXT: #NO_APP
312 ; CHECK-NEXT: cmpeqps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
314 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
315 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0)
318 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
320 define i32 @stack_fold_cmpsd(double %a0, double %a1) {
321 ; CHECK-LABEL: stack_fold_cmpsd:
323 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
326 ; CHECK-NEXT: #NO_APP
327 ; CHECK-NEXT: cmpeqsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
328 ; CHECK-NEXT: movq %xmm0, %rax
329 ; CHECK-NEXT: andl $1, %eax
330 ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
332 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
333 %2 = fcmp oeq double %a0, %a1
334 %3 = zext i1 %2 to i32
338 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) {
339 ; CHECK-LABEL: stack_fold_cmpsd_int:
341 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
344 ; CHECK-NEXT: #NO_APP
345 ; CHECK-NEXT: cmpeqsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
347 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
348 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
351 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
353 define i32 @stack_fold_cmpss(float %a0, float %a1) {
354 ; CHECK-LABEL: stack_fold_cmpss:
356 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
359 ; CHECK-NEXT: #NO_APP
360 ; CHECK-NEXT: cmpeqss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
361 ; CHECK-NEXT: movd %xmm0, %eax
362 ; CHECK-NEXT: andl $1, %eax
364 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
365 %2 = fcmp oeq float %a0, %a1
366 %3 = zext i1 %2 to i32
370 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) {
371 ; CHECK-LABEL: stack_fold_cmpss_int:
373 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
376 ; CHECK-NEXT: #NO_APP
377 ; CHECK-NEXT: cmpeqss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
379 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
380 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
383 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
385 ; TODO stack_fold_comisd
387 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) {
388 ; CHECK-LABEL: stack_fold_comisd_int:
390 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
393 ; CHECK-NEXT: #NO_APP
394 ; CHECK-NEXT: comisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
395 ; CHECK-NEXT: setnp %al
396 ; CHECK-NEXT: sete %cl
397 ; CHECK-NEXT: andb %al, %cl
398 ; CHECK-NEXT: movzbl %cl, %eax
400 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
401 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
404 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
406 ; TODO stack_fold_comiss
408 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) {
409 ; CHECK-LABEL: stack_fold_comiss_int:
411 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
414 ; CHECK-NEXT: #NO_APP
415 ; CHECK-NEXT: comiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
416 ; CHECK-NEXT: setnp %al
417 ; CHECK-NEXT: sete %cl
418 ; CHECK-NEXT: andb %al, %cl
419 ; CHECK-NEXT: movzbl %cl, %eax
421 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
422 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
425 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
427 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) {
428 ; CHECK-LABEL: stack_fold_cvtdq2pd:
430 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
433 ; CHECK-NEXT: #NO_APP
434 ; CHECK-NEXT: cvtdq2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
436 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
437 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
438 %3 = sitofp <2 x i32> %2 to <2 x double>
442 define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) {
443 ; CHECK-LABEL: stack_fold_cvtdq2pd_int:
445 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
448 ; CHECK-NEXT: #NO_APP
449 ; CHECK-NEXT: cvtdq2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
451 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
452 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a0, <2 x i32> <i32 0, i32 1>
453 %cvt = sitofp <2 x i32> %2 to <2 x double>
454 ret <2 x double> %cvt
457 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
458 ; CHECK-LABEL: stack_fold_cvtdq2ps:
460 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
463 ; CHECK-NEXT: #NO_APP
464 ; CHECK-NEXT: cvtdq2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
466 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
467 %2 = sitofp <4 x i32> %a0 to <4 x float>
471 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) {
472 ; CHECK-LABEL: stack_fold_cvtpd2dq:
474 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
477 ; CHECK-NEXT: #NO_APP
478 ; CHECK-NEXT: cvtpd2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
481 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
484 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
486 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) {
487 ; CHECK-LABEL: stack_fold_cvtpd2ps:
489 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
492 ; CHECK-NEXT: #NO_APP
493 ; CHECK-NEXT: cvtpd2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
495 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
496 %2 = fptrunc <2 x double> %a0 to <2 x float>
500 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) {
501 ; CHECK-LABEL: stack_fold_cvtps2dq:
503 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
506 ; CHECK-NEXT: #NO_APP
507 ; CHECK-NEXT: cvtps2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
509 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
510 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
513 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
515 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) {
516 ; CHECK-LABEL: stack_fold_cvtps2pd:
518 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
521 ; CHECK-NEXT: #NO_APP
522 ; CHECK-NEXT: cvtps2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
524 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
525 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1>
526 %3 = fpext <2 x float> %2 to <2 x double>
530 define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) {
531 ; CHECK-LABEL: stack_fold_cvtps2pd_int:
533 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
536 ; CHECK-NEXT: #NO_APP
537 ; CHECK-NEXT: cvtps2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
540 %2 = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
541 %cvtps2pd = fpext <2 x float> %2 to <2 x double>
542 ret <2 x double> %cvtps2pd
545 ; TODO stack_fold_cvtsd2si
547 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) {
548 ; CHECK-LABEL: stack_fold_cvtsd2si_int:
550 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
553 ; CHECK-NEXT: #NO_APP
554 ; CHECK-NEXT: cvtsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
556 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
557 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
560 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
562 ; TODO stack_fold_cvtsd2si64
564 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
565 ; CHECK-LABEL: stack_fold_cvtsd2si64_int:
567 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
570 ; CHECK-NEXT: #NO_APP
571 ; CHECK-NEXT: cvtsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
573 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
574 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0)
577 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
579 define float @stack_fold_cvtsd2ss(double %a0) minsize {
580 ; CHECK-LABEL: stack_fold_cvtsd2ss:
582 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
585 ; CHECK-NEXT: #NO_APP
586 ; CHECK-NEXT: xorps %xmm0, %xmm0
587 ; CHECK-NEXT: cvtsd2ss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
589 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
590 %2 = fptrunc double %a0 to float
594 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize {
595 ; CHECK-LABEL: stack_fold_cvtsd2ss_int:
597 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
600 ; CHECK-NEXT: #NO_APP
601 ; CHECK-NEXT: xorps %xmm1, %xmm1
602 ; CHECK-NEXT: cvtsd2ss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
603 ; CHECK-NEXT: movaps %xmm1, %xmm0
605 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
606 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0)
609 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
611 define double @stack_fold_cvtsi2sd(i32 %a0) {
612 ; CHECK-LABEL: stack_fold_cvtsi2sd:
614 ; CHECK-NEXT: pushq %rbp
615 ; CHECK-NEXT: .cfi_def_cfa_offset 16
616 ; CHECK-NEXT: pushq %r15
617 ; CHECK-NEXT: .cfi_def_cfa_offset 24
618 ; CHECK-NEXT: pushq %r14
619 ; CHECK-NEXT: .cfi_def_cfa_offset 32
620 ; CHECK-NEXT: pushq %r13
621 ; CHECK-NEXT: .cfi_def_cfa_offset 40
622 ; CHECK-NEXT: pushq %r12
623 ; CHECK-NEXT: .cfi_def_cfa_offset 48
624 ; CHECK-NEXT: pushq %rbx
625 ; CHECK-NEXT: .cfi_def_cfa_offset 56
626 ; CHECK-NEXT: .cfi_offset %rbx, -56
627 ; CHECK-NEXT: .cfi_offset %r12, -48
628 ; CHECK-NEXT: .cfi_offset %r13, -40
629 ; CHECK-NEXT: .cfi_offset %r14, -32
630 ; CHECK-NEXT: .cfi_offset %r15, -24
631 ; CHECK-NEXT: .cfi_offset %rbp, -16
632 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
635 ; CHECK-NEXT: #NO_APP
636 ; CHECK-NEXT: xorps %xmm0, %xmm0
637 ; CHECK-NEXT: cvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
638 ; CHECK-NEXT: popq %rbx
639 ; CHECK-NEXT: .cfi_def_cfa_offset 48
640 ; CHECK-NEXT: popq %r12
641 ; CHECK-NEXT: .cfi_def_cfa_offset 40
642 ; CHECK-NEXT: popq %r13
643 ; CHECK-NEXT: .cfi_def_cfa_offset 32
644 ; CHECK-NEXT: popq %r14
645 ; CHECK-NEXT: .cfi_def_cfa_offset 24
646 ; CHECK-NEXT: popq %r15
647 ; CHECK-NEXT: .cfi_def_cfa_offset 16
648 ; CHECK-NEXT: popq %rbp
649 ; CHECK-NEXT: .cfi_def_cfa_offset 8
651 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
652 %2 = sitofp i32 %a0 to double
656 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0, <2 x double> %b0) {
657 ; CHECK-LABEL: stack_fold_cvtsi2sd_int:
659 ; CHECK-NEXT: pushq %rbp
660 ; CHECK-NEXT: .cfi_def_cfa_offset 16
661 ; CHECK-NEXT: pushq %r15
662 ; CHECK-NEXT: .cfi_def_cfa_offset 24
663 ; CHECK-NEXT: pushq %r14
664 ; CHECK-NEXT: .cfi_def_cfa_offset 32
665 ; CHECK-NEXT: pushq %r13
666 ; CHECK-NEXT: .cfi_def_cfa_offset 40
667 ; CHECK-NEXT: pushq %r12
668 ; CHECK-NEXT: .cfi_def_cfa_offset 48
669 ; CHECK-NEXT: pushq %rbx
670 ; CHECK-NEXT: .cfi_def_cfa_offset 56
671 ; CHECK-NEXT: .cfi_offset %rbx, -56
672 ; CHECK-NEXT: .cfi_offset %r12, -48
673 ; CHECK-NEXT: .cfi_offset %r13, -40
674 ; CHECK-NEXT: .cfi_offset %r14, -32
675 ; CHECK-NEXT: .cfi_offset %r15, -24
676 ; CHECK-NEXT: .cfi_offset %rbp, -16
677 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
680 ; CHECK-NEXT: #NO_APP
681 ; CHECK-NEXT: cvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
682 ; CHECK-NEXT: popq %rbx
683 ; CHECK-NEXT: .cfi_def_cfa_offset 48
684 ; CHECK-NEXT: popq %r12
685 ; CHECK-NEXT: .cfi_def_cfa_offset 40
686 ; CHECK-NEXT: popq %r13
687 ; CHECK-NEXT: .cfi_def_cfa_offset 32
688 ; CHECK-NEXT: popq %r14
689 ; CHECK-NEXT: .cfi_def_cfa_offset 24
690 ; CHECK-NEXT: popq %r15
691 ; CHECK-NEXT: .cfi_def_cfa_offset 16
692 ; CHECK-NEXT: popq %rbp
693 ; CHECK-NEXT: .cfi_def_cfa_offset 8
695 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
696 %2 = sitofp i32 %a0 to double
697 %3 = insertelement <2 x double> %b0, double %2, i64 0
701 define double @stack_fold_cvtsi642sd(i64 %a0) {
702 ; CHECK-LABEL: stack_fold_cvtsi642sd:
704 ; CHECK-NEXT: pushq %rbp
705 ; CHECK-NEXT: .cfi_def_cfa_offset 16
706 ; CHECK-NEXT: pushq %r15
707 ; CHECK-NEXT: .cfi_def_cfa_offset 24
708 ; CHECK-NEXT: pushq %r14
709 ; CHECK-NEXT: .cfi_def_cfa_offset 32
710 ; CHECK-NEXT: pushq %r13
711 ; CHECK-NEXT: .cfi_def_cfa_offset 40
712 ; CHECK-NEXT: pushq %r12
713 ; CHECK-NEXT: .cfi_def_cfa_offset 48
714 ; CHECK-NEXT: pushq %rbx
715 ; CHECK-NEXT: .cfi_def_cfa_offset 56
716 ; CHECK-NEXT: .cfi_offset %rbx, -56
717 ; CHECK-NEXT: .cfi_offset %r12, -48
718 ; CHECK-NEXT: .cfi_offset %r13, -40
719 ; CHECK-NEXT: .cfi_offset %r14, -32
720 ; CHECK-NEXT: .cfi_offset %r15, -24
721 ; CHECK-NEXT: .cfi_offset %rbp, -16
722 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
725 ; CHECK-NEXT: #NO_APP
726 ; CHECK-NEXT: xorps %xmm0, %xmm0
727 ; CHECK-NEXT: cvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
728 ; CHECK-NEXT: popq %rbx
729 ; CHECK-NEXT: .cfi_def_cfa_offset 48
730 ; CHECK-NEXT: popq %r12
731 ; CHECK-NEXT: .cfi_def_cfa_offset 40
732 ; CHECK-NEXT: popq %r13
733 ; CHECK-NEXT: .cfi_def_cfa_offset 32
734 ; CHECK-NEXT: popq %r14
735 ; CHECK-NEXT: .cfi_def_cfa_offset 24
736 ; CHECK-NEXT: popq %r15
737 ; CHECK-NEXT: .cfi_def_cfa_offset 16
738 ; CHECK-NEXT: popq %rbp
739 ; CHECK-NEXT: .cfi_def_cfa_offset 8
741 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
742 %2 = sitofp i64 %a0 to double
746 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0, <2 x double> %b0) {
747 ; CHECK-LABEL: stack_fold_cvtsi642sd_int:
749 ; CHECK-NEXT: pushq %rbp
750 ; CHECK-NEXT: .cfi_def_cfa_offset 16
751 ; CHECK-NEXT: pushq %r15
752 ; CHECK-NEXT: .cfi_def_cfa_offset 24
753 ; CHECK-NEXT: pushq %r14
754 ; CHECK-NEXT: .cfi_def_cfa_offset 32
755 ; CHECK-NEXT: pushq %r13
756 ; CHECK-NEXT: .cfi_def_cfa_offset 40
757 ; CHECK-NEXT: pushq %r12
758 ; CHECK-NEXT: .cfi_def_cfa_offset 48
759 ; CHECK-NEXT: pushq %rbx
760 ; CHECK-NEXT: .cfi_def_cfa_offset 56
761 ; CHECK-NEXT: .cfi_offset %rbx, -56
762 ; CHECK-NEXT: .cfi_offset %r12, -48
763 ; CHECK-NEXT: .cfi_offset %r13, -40
764 ; CHECK-NEXT: .cfi_offset %r14, -32
765 ; CHECK-NEXT: .cfi_offset %r15, -24
766 ; CHECK-NEXT: .cfi_offset %rbp, -16
767 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
770 ; CHECK-NEXT: #NO_APP
771 ; CHECK-NEXT: cvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
772 ; CHECK-NEXT: popq %rbx
773 ; CHECK-NEXT: .cfi_def_cfa_offset 48
774 ; CHECK-NEXT: popq %r12
775 ; CHECK-NEXT: .cfi_def_cfa_offset 40
776 ; CHECK-NEXT: popq %r13
777 ; CHECK-NEXT: .cfi_def_cfa_offset 32
778 ; CHECK-NEXT: popq %r14
779 ; CHECK-NEXT: .cfi_def_cfa_offset 24
780 ; CHECK-NEXT: popq %r15
781 ; CHECK-NEXT: .cfi_def_cfa_offset 16
782 ; CHECK-NEXT: popq %rbp
783 ; CHECK-NEXT: .cfi_def_cfa_offset 8
785 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
786 %2 = sitofp i64 %a0 to double
787 %3 = insertelement <2 x double> %b0, double %2, i64 0
791 define float @stack_fold_cvtsi2ss(i32 %a0) {
792 ; CHECK-LABEL: stack_fold_cvtsi2ss:
794 ; CHECK-NEXT: pushq %rbp
795 ; CHECK-NEXT: .cfi_def_cfa_offset 16
796 ; CHECK-NEXT: pushq %r15
797 ; CHECK-NEXT: .cfi_def_cfa_offset 24
798 ; CHECK-NEXT: pushq %r14
799 ; CHECK-NEXT: .cfi_def_cfa_offset 32
800 ; CHECK-NEXT: pushq %r13
801 ; CHECK-NEXT: .cfi_def_cfa_offset 40
802 ; CHECK-NEXT: pushq %r12
803 ; CHECK-NEXT: .cfi_def_cfa_offset 48
804 ; CHECK-NEXT: pushq %rbx
805 ; CHECK-NEXT: .cfi_def_cfa_offset 56
806 ; CHECK-NEXT: .cfi_offset %rbx, -56
807 ; CHECK-NEXT: .cfi_offset %r12, -48
808 ; CHECK-NEXT: .cfi_offset %r13, -40
809 ; CHECK-NEXT: .cfi_offset %r14, -32
810 ; CHECK-NEXT: .cfi_offset %r15, -24
811 ; CHECK-NEXT: .cfi_offset %rbp, -16
812 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
815 ; CHECK-NEXT: #NO_APP
816 ; CHECK-NEXT: xorps %xmm0, %xmm0
817 ; CHECK-NEXT: cvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
818 ; CHECK-NEXT: popq %rbx
819 ; CHECK-NEXT: .cfi_def_cfa_offset 48
820 ; CHECK-NEXT: popq %r12
821 ; CHECK-NEXT: .cfi_def_cfa_offset 40
822 ; CHECK-NEXT: popq %r13
823 ; CHECK-NEXT: .cfi_def_cfa_offset 32
824 ; CHECK-NEXT: popq %r14
825 ; CHECK-NEXT: .cfi_def_cfa_offset 24
826 ; CHECK-NEXT: popq %r15
827 ; CHECK-NEXT: .cfi_def_cfa_offset 16
828 ; CHECK-NEXT: popq %rbp
829 ; CHECK-NEXT: .cfi_def_cfa_offset 8
831 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
832 %2 = sitofp i32 %a0 to float
836 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0, <4 x float> %b0) {
837 ; CHECK-LABEL: stack_fold_cvtsi2ss_int:
839 ; CHECK-NEXT: pushq %rbp
840 ; CHECK-NEXT: .cfi_def_cfa_offset 16
841 ; CHECK-NEXT: pushq %r15
842 ; CHECK-NEXT: .cfi_def_cfa_offset 24
843 ; CHECK-NEXT: pushq %r14
844 ; CHECK-NEXT: .cfi_def_cfa_offset 32
845 ; CHECK-NEXT: pushq %r13
846 ; CHECK-NEXT: .cfi_def_cfa_offset 40
847 ; CHECK-NEXT: pushq %r12
848 ; CHECK-NEXT: .cfi_def_cfa_offset 48
849 ; CHECK-NEXT: pushq %rbx
850 ; CHECK-NEXT: .cfi_def_cfa_offset 56
851 ; CHECK-NEXT: .cfi_offset %rbx, -56
852 ; CHECK-NEXT: .cfi_offset %r12, -48
853 ; CHECK-NEXT: .cfi_offset %r13, -40
854 ; CHECK-NEXT: .cfi_offset %r14, -32
855 ; CHECK-NEXT: .cfi_offset %r15, -24
856 ; CHECK-NEXT: .cfi_offset %rbp, -16
857 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
860 ; CHECK-NEXT: #NO_APP
861 ; CHECK-NEXT: cvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
862 ; CHECK-NEXT: popq %rbx
863 ; CHECK-NEXT: .cfi_def_cfa_offset 48
864 ; CHECK-NEXT: popq %r12
865 ; CHECK-NEXT: .cfi_def_cfa_offset 40
866 ; CHECK-NEXT: popq %r13
867 ; CHECK-NEXT: .cfi_def_cfa_offset 32
868 ; CHECK-NEXT: popq %r14
869 ; CHECK-NEXT: .cfi_def_cfa_offset 24
870 ; CHECK-NEXT: popq %r15
871 ; CHECK-NEXT: .cfi_def_cfa_offset 16
872 ; CHECK-NEXT: popq %rbp
873 ; CHECK-NEXT: .cfi_def_cfa_offset 8
875 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
876 %2 = sitofp i32 %a0 to float
877 %3 = insertelement <4 x float> %b0, float %2, i64 0
881 define float @stack_fold_cvtsi642ss(i64 %a0) {
882 ; CHECK-LABEL: stack_fold_cvtsi642ss:
884 ; CHECK-NEXT: pushq %rbp
885 ; CHECK-NEXT: .cfi_def_cfa_offset 16
886 ; CHECK-NEXT: pushq %r15
887 ; CHECK-NEXT: .cfi_def_cfa_offset 24
888 ; CHECK-NEXT: pushq %r14
889 ; CHECK-NEXT: .cfi_def_cfa_offset 32
890 ; CHECK-NEXT: pushq %r13
891 ; CHECK-NEXT: .cfi_def_cfa_offset 40
892 ; CHECK-NEXT: pushq %r12
893 ; CHECK-NEXT: .cfi_def_cfa_offset 48
894 ; CHECK-NEXT: pushq %rbx
895 ; CHECK-NEXT: .cfi_def_cfa_offset 56
896 ; CHECK-NEXT: .cfi_offset %rbx, -56
897 ; CHECK-NEXT: .cfi_offset %r12, -48
898 ; CHECK-NEXT: .cfi_offset %r13, -40
899 ; CHECK-NEXT: .cfi_offset %r14, -32
900 ; CHECK-NEXT: .cfi_offset %r15, -24
901 ; CHECK-NEXT: .cfi_offset %rbp, -16
902 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
905 ; CHECK-NEXT: #NO_APP
906 ; CHECK-NEXT: xorps %xmm0, %xmm0
907 ; CHECK-NEXT: cvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
908 ; CHECK-NEXT: popq %rbx
909 ; CHECK-NEXT: .cfi_def_cfa_offset 48
910 ; CHECK-NEXT: popq %r12
911 ; CHECK-NEXT: .cfi_def_cfa_offset 40
912 ; CHECK-NEXT: popq %r13
913 ; CHECK-NEXT: .cfi_def_cfa_offset 32
914 ; CHECK-NEXT: popq %r14
915 ; CHECK-NEXT: .cfi_def_cfa_offset 24
916 ; CHECK-NEXT: popq %r15
917 ; CHECK-NEXT: .cfi_def_cfa_offset 16
918 ; CHECK-NEXT: popq %rbp
919 ; CHECK-NEXT: .cfi_def_cfa_offset 8
921 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
922 %2 = sitofp i64 %a0 to float
926 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0, <4 x float> %b0) {
927 ; CHECK-LABEL: stack_fold_cvtsi642ss_int:
929 ; CHECK-NEXT: pushq %rbp
930 ; CHECK-NEXT: .cfi_def_cfa_offset 16
931 ; CHECK-NEXT: pushq %r15
932 ; CHECK-NEXT: .cfi_def_cfa_offset 24
933 ; CHECK-NEXT: pushq %r14
934 ; CHECK-NEXT: .cfi_def_cfa_offset 32
935 ; CHECK-NEXT: pushq %r13
936 ; CHECK-NEXT: .cfi_def_cfa_offset 40
937 ; CHECK-NEXT: pushq %r12
938 ; CHECK-NEXT: .cfi_def_cfa_offset 48
939 ; CHECK-NEXT: pushq %rbx
940 ; CHECK-NEXT: .cfi_def_cfa_offset 56
941 ; CHECK-NEXT: .cfi_offset %rbx, -56
942 ; CHECK-NEXT: .cfi_offset %r12, -48
943 ; CHECK-NEXT: .cfi_offset %r13, -40
944 ; CHECK-NEXT: .cfi_offset %r14, -32
945 ; CHECK-NEXT: .cfi_offset %r15, -24
946 ; CHECK-NEXT: .cfi_offset %rbp, -16
947 ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
950 ; CHECK-NEXT: #NO_APP
951 ; CHECK-NEXT: cvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
952 ; CHECK-NEXT: popq %rbx
953 ; CHECK-NEXT: .cfi_def_cfa_offset 48
954 ; CHECK-NEXT: popq %r12
955 ; CHECK-NEXT: .cfi_def_cfa_offset 40
956 ; CHECK-NEXT: popq %r13
957 ; CHECK-NEXT: .cfi_def_cfa_offset 32
958 ; CHECK-NEXT: popq %r14
959 ; CHECK-NEXT: .cfi_def_cfa_offset 24
960 ; CHECK-NEXT: popq %r15
961 ; CHECK-NEXT: .cfi_def_cfa_offset 16
962 ; CHECK-NEXT: popq %rbp
963 ; CHECK-NEXT: .cfi_def_cfa_offset 8
965 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
966 %2 = sitofp i64 %a0 to float
967 %3 = insertelement <4 x float> %b0, float %2, i64 0
971 define double @stack_fold_cvtss2sd(float %a0) minsize {
972 ; CHECK-LABEL: stack_fold_cvtss2sd:
974 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
977 ; CHECK-NEXT: #NO_APP
978 ; CHECK-NEXT: xorps %xmm0, %xmm0
979 ; CHECK-NEXT: cvtss2sd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
981 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
982 %2 = fpext float %a0 to double
986 define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize {
987 ; CHECK-LABEL: stack_fold_cvtss2sd_int:
989 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
992 ; CHECK-NEXT: #NO_APP
993 ; CHECK-NEXT: xorps %xmm0, %xmm0
994 ; CHECK-NEXT: cvtss2sd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
995 ; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
997 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
998 %2 = extractelement <4 x float> %a0, i64 0
999 %3 = fpext float %2 to double
1000 %4 = insertelement <2 x double> zeroinitializer, double %3, i64 0
1004 ; TODO stack_fold_cvtss2si
1006 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) {
1007 ; CHECK-LABEL: stack_fold_cvtss2si_int:
1009 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1012 ; CHECK-NEXT: #NO_APP
1013 ; CHECK-NEXT: cvtss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
1015 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1016 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
1019 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
1021 ; TODO stack_fold_cvtss2si64
1023 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) {
1024 ; CHECK-LABEL: stack_fold_cvtss2si64_int:
1026 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1029 ; CHECK-NEXT: #NO_APP
1030 ; CHECK-NEXT: cvtss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
1032 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1033 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0)
1036 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
1038 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) {
1039 ; CHECK-LABEL: stack_fold_cvttpd2dq:
1041 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1044 ; CHECK-NEXT: #NO_APP
1045 ; CHECK-NEXT: cvttpd2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1047 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1048 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1051 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1053 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) {
1054 ; CHECK-LABEL: stack_fold_cvttps2dq:
1056 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1059 ; CHECK-NEXT: #NO_APP
1060 ; CHECK-NEXT: cvttps2dq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1062 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1063 %2 = fptosi <4 x float> %a0 to <4 x i32>
1067 define i32 @stack_fold_cvttsd2si(double %a0) {
1068 ; CHECK-LABEL: stack_fold_cvttsd2si:
1070 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1073 ; CHECK-NEXT: #NO_APP
1074 ; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 8-byte Folded Reload
1076 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1077 %2 = fptosi double %a0 to i32
1081 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) {
1082 ; CHECK-LABEL: stack_fold_cvttsd2si_int:
1084 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1087 ; CHECK-NEXT: #NO_APP
1088 ; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
1090 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1091 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
1094 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
1096 define i64 @stack_fold_cvttsd2si64(double %a0) {
1097 ; CHECK-LABEL: stack_fold_cvttsd2si64:
1099 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1102 ; CHECK-NEXT: #NO_APP
1103 ; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
1105 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1106 %2 = fptosi double %a0 to i64
1110 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) {
1111 ; CHECK-LABEL: stack_fold_cvttsd2si64_int:
1113 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1116 ; CHECK-NEXT: #NO_APP
1117 ; CHECK-NEXT: cvttsd2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
1119 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1120 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0)
1123 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
1125 define i32 @stack_fold_cvttss2si(float %a0) {
1126 ; CHECK-LABEL: stack_fold_cvttss2si:
1128 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1131 ; CHECK-NEXT: #NO_APP
1132 ; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
1134 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1135 %2 = fptosi float %a0 to i32
1139 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) {
1140 ; CHECK-LABEL: stack_fold_cvttss2si_int:
1142 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1145 ; CHECK-NEXT: #NO_APP
1146 ; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %eax # 16-byte Folded Reload
1148 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1149 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
1152 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
1154 define i64 @stack_fold_cvttss2si64(float %a0) {
1155 ; CHECK-LABEL: stack_fold_cvttss2si64:
1157 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1160 ; CHECK-NEXT: #NO_APP
1161 ; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 4-byte Folded Reload
1163 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1164 %2 = fptosi float %a0 to i64
1168 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) {
1169 ; CHECK-LABEL: stack_fold_cvttss2si64_int:
1171 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1174 ; CHECK-NEXT: #NO_APP
1175 ; CHECK-NEXT: cvttss2si {{[-0-9]+}}(%r{{[sb]}}p), %rax # 16-byte Folded Reload
1177 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1178 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0)
1181 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
1183 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
1184 ; CHECK-LABEL: stack_fold_divpd:
1186 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1189 ; CHECK-NEXT: #NO_APP
1190 ; CHECK-NEXT: divpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1192 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1193 %2 = fdiv <2 x double> %a0, %a1
1197 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) {
1198 ; CHECK-LABEL: stack_fold_divps:
1200 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1203 ; CHECK-NEXT: #NO_APP
1204 ; CHECK-NEXT: divps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1206 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1207 %2 = fdiv <4 x float> %a0, %a1
1211 define double @stack_fold_divsd(double %a0, double %a1) {
1212 ; CHECK-LABEL: stack_fold_divsd:
1214 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1217 ; CHECK-NEXT: #NO_APP
1218 ; CHECK-NEXT: divsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1220 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1221 %2 = fdiv double %a0, %a1
1225 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
1226 ; CHECK-LABEL: stack_fold_divsd_int:
1228 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1231 ; CHECK-NEXT: #NO_APP
1232 ; CHECK-NEXT: divsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1234 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1235 %2 = extractelement <2 x double> %a0, i32 0
1236 %3 = extractelement <2 x double> %a1, i32 0
1237 %4 = fdiv double %2, %3
1238 %5 = insertelement <2 x double> %a0, double %4, i32 0
1241 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
1243 define float @stack_fold_divss(float %a0, float %a1) {
1244 ; CHECK-LABEL: stack_fold_divss:
1246 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1249 ; CHECK-NEXT: #NO_APP
1250 ; CHECK-NEXT: divss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1252 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1253 %2 = fdiv float %a0, %a1
1257 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
1258 ; CHECK-LABEL: stack_fold_divss_int:
1260 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1263 ; CHECK-NEXT: #NO_APP
1264 ; CHECK-NEXT: divss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1266 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1267 %2 = extractelement <4 x float> %a0, i32 0
1268 %3 = extractelement <4 x float> %a1, i32 0
1269 %4 = fdiv float %2, %3
1270 %5 = insertelement <4 x float> %a0, float %4, i32 0
1273 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
1275 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) {
1276 ; CHECK-LABEL: stack_fold_dppd:
1278 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1281 ; CHECK-NEXT: #NO_APP
1282 ; CHECK-NEXT: dppd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1284 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1285 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
1288 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
1290 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) {
1291 ; CHECK-LABEL: stack_fold_dpps:
1293 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1296 ; CHECK-NEXT: #NO_APP
1297 ; CHECK-NEXT: dpps $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1299 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1300 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
1303 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
1305 define i32 @stack_fold_extractps(<4 x float> %a0, <4 x float> %a1) {
1306 ; CHECK-LABEL: stack_fold_extractps:
1308 ; CHECK-NEXT: pushq %rbp
1309 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1310 ; CHECK-NEXT: pushq %r15
1311 ; CHECK-NEXT: .cfi_def_cfa_offset 24
1312 ; CHECK-NEXT: pushq %r14
1313 ; CHECK-NEXT: .cfi_def_cfa_offset 32
1314 ; CHECK-NEXT: pushq %r13
1315 ; CHECK-NEXT: .cfi_def_cfa_offset 40
1316 ; CHECK-NEXT: pushq %r12
1317 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1318 ; CHECK-NEXT: pushq %rbx
1319 ; CHECK-NEXT: .cfi_def_cfa_offset 56
1320 ; CHECK-NEXT: .cfi_offset %rbx, -56
1321 ; CHECK-NEXT: .cfi_offset %r12, -48
1322 ; CHECK-NEXT: .cfi_offset %r13, -40
1323 ; CHECK-NEXT: .cfi_offset %r14, -32
1324 ; CHECK-NEXT: .cfi_offset %r15, -24
1325 ; CHECK-NEXT: .cfi_offset %rbp, -16
1326 ; CHECK-NEXT: addps %xmm1, %xmm0
1327 ; CHECK-NEXT: extractps $1, %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1330 ; CHECK-NEXT: #NO_APP
1331 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
1332 ; CHECK-NEXT: popq %rbx
1333 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1334 ; CHECK-NEXT: popq %r12
1335 ; CHECK-NEXT: .cfi_def_cfa_offset 40
1336 ; CHECK-NEXT: popq %r13
1337 ; CHECK-NEXT: .cfi_def_cfa_offset 32
1338 ; CHECK-NEXT: popq %r14
1339 ; CHECK-NEXT: .cfi_def_cfa_offset 24
1340 ; CHECK-NEXT: popq %r15
1341 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1342 ; CHECK-NEXT: popq %rbp
1343 ; CHECK-NEXT: .cfi_def_cfa_offset 8
1345 ; fadd forces execution domain
1346 %1 = fadd <4 x float> %a0, %a1
1347 %2 = extractelement <4 x float> %1, i32 1
1348 %3 = bitcast float %2 to i32
1349 %4 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
1353 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) {
1354 ; CHECK-LABEL: stack_fold_haddpd:
1356 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1359 ; CHECK-NEXT: #NO_APP
1360 ; CHECK-NEXT: haddpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1362 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1363 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
1366 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
1368 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) {
1369 ; CHECK-LABEL: stack_fold_haddps:
1371 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1374 ; CHECK-NEXT: #NO_APP
1375 ; CHECK-NEXT: haddps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1377 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1378 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
1381 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
1383 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) {
1384 ; CHECK-LABEL: stack_fold_hsubpd:
1386 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1389 ; CHECK-NEXT: #NO_APP
1390 ; CHECK-NEXT: hsubpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1392 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1393 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
1396 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
1398 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
1399 ; CHECK-LABEL: stack_fold_hsubps:
1401 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1404 ; CHECK-NEXT: #NO_APP
1405 ; CHECK-NEXT: hsubps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1407 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1408 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
1411 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
1413 define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
1414 ; CHECK-LABEL: stack_fold_insertps:
1416 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1419 ; CHECK-NEXT: #NO_APP
1420 ; CHECK-NEXT: insertps $17, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1421 ; CHECK-NEXT: # xmm0 = zero,mem[0],xmm0[2,3]
1423 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1424 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
1427 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
1429 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
1430 ; CHECK-LABEL: stack_fold_maxpd:
1432 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1435 ; CHECK-NEXT: #NO_APP
1436 ; CHECK-NEXT: maxpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1438 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1439 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1442 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1444 define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
1445 ; CHECK-LABEL: stack_fold_maxpd_commutable:
1447 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1450 ; CHECK-NEXT: #NO_APP
1451 ; CHECK-NEXT: maxpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1453 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1454 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1458 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
1459 ; CHECK-LABEL: stack_fold_maxps:
1461 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1464 ; CHECK-NEXT: #NO_APP
1465 ; CHECK-NEXT: maxps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1467 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1468 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
1471 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1473 define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
1474 ; CHECK-LABEL: stack_fold_maxps_commutable:
1476 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1479 ; CHECK-NEXT: #NO_APP
1480 ; CHECK-NEXT: maxps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1482 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1483 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
1487 define double @stack_fold_maxsd(double %a0, double %a1) #0 {
1488 ; CHECK-LABEL: stack_fold_maxsd:
1490 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1493 ; CHECK-NEXT: #NO_APP
1494 ; CHECK-NEXT: maxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1496 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1497 %2 = fcmp ogt double %a0, %a1
1498 %3 = select i1 %2, double %a0, double %a1
1502 define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 {
1503 ; CHECK-LABEL: stack_fold_maxsd_commutable:
1505 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1508 ; CHECK-NEXT: #NO_APP
1509 ; CHECK-NEXT: maxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1511 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1512 %2 = fcmp ogt double %a0, %a1
1513 %3 = select i1 %2, double %a0, double %a1
1517 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 {
1518 ; CHECK-LABEL: stack_fold_maxsd_int:
1520 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1523 ; CHECK-NEXT: #NO_APP
1524 ; CHECK-NEXT: maxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1526 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1527 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1530 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1532 define float @stack_fold_maxss(float %a0, float %a1) #0 {
1533 ; CHECK-LABEL: stack_fold_maxss:
1535 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1538 ; CHECK-NEXT: #NO_APP
1539 ; CHECK-NEXT: maxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1541 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1542 %2 = fcmp ogt float %a0, %a1
1543 %3 = select i1 %2, float %a0, float %a1
1547 define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 {
1548 ; CHECK-LABEL: stack_fold_maxss_commutable:
1550 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1553 ; CHECK-NEXT: #NO_APP
1554 ; CHECK-NEXT: maxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1556 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1557 %2 = fcmp ogt float %a0, %a1
1558 %3 = select i1 %2, float %a0, float %a1
1562 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 {
1563 ; CHECK-LABEL: stack_fold_maxss_int:
1565 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1568 ; CHECK-NEXT: #NO_APP
1569 ; CHECK-NEXT: maxss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1571 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1572 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
1575 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1577 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 {
1578 ; CHECK-LABEL: stack_fold_minpd:
1580 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1583 ; CHECK-NEXT: #NO_APP
1584 ; CHECK-NEXT: minpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1586 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1587 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1590 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1592 define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
1593 ; CHECK-LABEL: stack_fold_minpd_commutable:
1595 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1598 ; CHECK-NEXT: #NO_APP
1599 ; CHECK-NEXT: minpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1601 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1602 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1606 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
1607 ; CHECK-LABEL: stack_fold_minps:
1609 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1612 ; CHECK-NEXT: #NO_APP
1613 ; CHECK-NEXT: minps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1615 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1616 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1619 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1621 define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
1622 ; CHECK-LABEL: stack_fold_minps_commutable:
1624 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1627 ; CHECK-NEXT: #NO_APP
1628 ; CHECK-NEXT: minps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1630 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1631 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1635 define double @stack_fold_minsd(double %a0, double %a1) #0 {
1636 ; CHECK-LABEL: stack_fold_minsd:
1638 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1641 ; CHECK-NEXT: #NO_APP
1642 ; CHECK-NEXT: minsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1644 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1645 %2 = fcmp olt double %a0, %a1
1646 %3 = select i1 %2, double %a0, double %a1
1650 define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 {
1651 ; CHECK-LABEL: stack_fold_minsd_commutable:
1653 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1656 ; CHECK-NEXT: #NO_APP
1657 ; CHECK-NEXT: minsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1659 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1660 %2 = fcmp olt double %a0, %a1
1661 %3 = select i1 %2, double %a0, double %a1
1665 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) #0 {
1666 ; CHECK-LABEL: stack_fold_minsd_int:
1668 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1671 ; CHECK-NEXT: #NO_APP
1672 ; CHECK-NEXT: minsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1674 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1675 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1678 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1680 define float @stack_fold_minss(float %a0, float %a1) #0 {
1681 ; CHECK-LABEL: stack_fold_minss:
1683 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1686 ; CHECK-NEXT: #NO_APP
1687 ; CHECK-NEXT: minss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1689 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1690 %2 = fcmp olt float %a0, %a1
1691 %3 = select i1 %2, float %a0, float %a1
1695 define float @stack_fold_minss_commutable(float %a0, float %a1) #1 {
1696 ; CHECK-LABEL: stack_fold_minss_commutable:
1698 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1701 ; CHECK-NEXT: #NO_APP
1702 ; CHECK-NEXT: minss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1704 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1705 %2 = fcmp olt float %a0, %a1
1706 %3 = select i1 %2, float %a0, float %a1
1710 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 {
1711 ; CHECK-LABEL: stack_fold_minss_int:
1713 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1716 ; CHECK-NEXT: #NO_APP
1717 ; CHECK-NEXT: minss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1719 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1720 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
1723 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1725 define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
1726 ; CHECK-LABEL: stack_fold_movddup:
1728 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1731 ; CHECK-NEXT: #NO_APP
1732 ; CHECK-NEXT: movddup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1733 ; CHECK-NEXT: # xmm0 = mem[0,0]
1735 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1736 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1739 ; TODO stack_fold_movhpd (load / store)
1740 ; TODO stack_fold_movhps (load / store)
1742 ; TODO stack_fold_movlpd (load / store)
1743 ; TODO stack_fold_movlps (load / store)
1745 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
1746 ; CHECK-LABEL: stack_fold_movshdup:
1748 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1751 ; CHECK-NEXT: #NO_APP
1752 ; CHECK-NEXT: movshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1753 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1755 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1756 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1760 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
1761 ; CHECK-LABEL: stack_fold_movsldup:
1763 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1766 ; CHECK-NEXT: #NO_APP
1767 ; CHECK-NEXT: movsldup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1768 ; CHECK-NEXT: # xmm0 = mem[0,0,2,2]
1770 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1771 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1775 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) {
1776 ; CHECK-LABEL: stack_fold_mulpd:
1778 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1781 ; CHECK-NEXT: #NO_APP
1782 ; CHECK-NEXT: mulpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1784 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1785 %2 = fmul <2 x double> %a0, %a1
1789 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) {
1790 ; CHECK-LABEL: stack_fold_mulps:
1792 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1795 ; CHECK-NEXT: #NO_APP
1796 ; CHECK-NEXT: mulps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1798 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1799 %2 = fmul <4 x float> %a0, %a1
1803 define double @stack_fold_mulsd(double %a0, double %a1) {
1804 ; CHECK-LABEL: stack_fold_mulsd:
1806 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1809 ; CHECK-NEXT: #NO_APP
1810 ; CHECK-NEXT: mulsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1812 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1813 %2 = fmul double %a0, %a1
1817 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
1818 ; CHECK-LABEL: stack_fold_mulsd_int:
1820 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1823 ; CHECK-NEXT: #NO_APP
1824 ; CHECK-NEXT: mulsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1826 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1827 %2 = extractelement <2 x double> %a0, i32 0
1828 %3 = extractelement <2 x double> %a1, i32 0
1829 %4 = fmul double %2, %3
1830 %5 = insertelement <2 x double> %a0, double %4, i32 0
1834 define float @stack_fold_mulss(float %a0, float %a1) {
1835 ; CHECK-LABEL: stack_fold_mulss:
1837 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1840 ; CHECK-NEXT: #NO_APP
1841 ; CHECK-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
1843 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1844 %2 = fmul float %a0, %a1
1848 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
1849 ; CHECK-LABEL: stack_fold_mulss_int:
1851 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1854 ; CHECK-NEXT: #NO_APP
1855 ; CHECK-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1857 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1858 %2 = extractelement <4 x float> %a0, i32 0
1859 %3 = extractelement <4 x float> %a1, i32 0
1860 %4 = fmul float %2, %3
1861 %5 = insertelement <4 x float> %a0, float %4, i32 0
1865 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
1866 ; CHECK-LABEL: stack_fold_orpd:
1868 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1871 ; CHECK-NEXT: #NO_APP
1872 ; CHECK-NEXT: orpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1873 ; CHECK-NEXT: xorpd %xmm1, %xmm1
1874 ; CHECK-NEXT: addpd %xmm1, %xmm0
1876 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1877 %2 = bitcast <2 x double> %a0 to <2 x i64>
1878 %3 = bitcast <2 x double> %a1 to <2 x i64>
1879 %4 = or <2 x i64> %2, %3
1880 %5 = bitcast <2 x i64> %4 to <2 x double>
1881 ; fadd forces execution domain
1882 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
1886 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) {
1887 ; CHECK-LABEL: stack_fold_orps:
1889 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1892 ; CHECK-NEXT: #NO_APP
1893 ; CHECK-NEXT: orps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1894 ; CHECK-NEXT: xorps %xmm1, %xmm1
1895 ; CHECK-NEXT: addps %xmm1, %xmm0
1897 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1898 %2 = bitcast <4 x float> %a0 to <2 x i64>
1899 %3 = bitcast <4 x float> %a1 to <2 x i64>
1900 %4 = or <2 x i64> %2, %3
1901 %5 = bitcast <2 x i64> %4 to <4 x float>
1902 ; fadd forces execution domain
1903 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
1907 ; TODO stack_fold_rcpps
1909 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) {
1910 ; CHECK-LABEL: stack_fold_rcpps_int:
1912 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1915 ; CHECK-NEXT: #NO_APP
1916 ; CHECK-NEXT: rcpps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1918 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1919 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
1922 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1924 ; TODO stack_fold_rcpss
1926 define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0, <4 x float> %a1) optsize {
1927 ; CHECK-LABEL: stack_fold_rcpss_int:
1929 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1930 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1933 ; CHECK-NEXT: #NO_APP
1934 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1935 ; CHECK-NEXT: rcpss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1937 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1938 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a1)
1939 %3 = extractelement <4 x float> %2, i32 0
1940 %4 = insertelement <4 x float> %a0, float %3, i32 0
1943 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
1945 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
1946 ; CHECK-LABEL: stack_fold_roundpd:
1948 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1951 ; CHECK-NEXT: #NO_APP
1952 ; CHECK-NEXT: roundpd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1954 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1955 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
1958 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1960 define <4 x float> @stack_fold_roundps(<4 x float> %a0) {
1961 ; CHECK-LABEL: stack_fold_roundps:
1963 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1966 ; CHECK-NEXT: #NO_APP
1967 ; CHECK-NEXT: roundps $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1969 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1970 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
1973 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1975 define double @stack_fold_roundsd(double %a0) optsize {
1976 ; CHECK-LABEL: stack_fold_roundsd:
1978 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1981 ; CHECK-NEXT: #NO_APP
1982 ; CHECK-NEXT: xorps %xmm0, %xmm0
1983 ; CHECK-NEXT: roundsd $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
1985 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1986 %2 = call double @llvm.floor.f64(double %a0)
1989 declare double @llvm.floor.f64(double) nounwind readnone
1991 define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
1992 ; CHECK-LABEL: stack_fold_roundsd_int:
1994 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1995 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1998 ; CHECK-NEXT: #NO_APP
1999 ; CHECK-NEXT: movapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
2000 ; CHECK-NEXT: roundsd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2002 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2003 %2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
2006 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
2008 define float @stack_fold_roundss(float %a0) minsize {
2009 ; CHECK-LABEL: stack_fold_roundss:
2011 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2014 ; CHECK-NEXT: #NO_APP
2015 ; CHECK-NEXT: xorps %xmm0, %xmm0
2016 ; CHECK-NEXT: roundss $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
2018 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2019 %2 = call float @llvm.floor.f32(float %a0)
2022 declare float @llvm.floor.f32(float) nounwind readnone
2024 define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize {
2025 ; CHECK-LABEL: stack_fold_roundss_int:
2027 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2028 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2031 ; CHECK-NEXT: #NO_APP
2032 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
2033 ; CHECK-NEXT: roundss $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2035 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2036 %2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
2039 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
2041 ; TODO stack_fold_rsqrtps
2043 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) {
2044 ; CHECK-LABEL: stack_fold_rsqrtps_int:
2046 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2049 ; CHECK-NEXT: #NO_APP
2050 ; CHECK-NEXT: rsqrtps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2052 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2053 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
2056 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
2058 ; TODO stack_fold_rsqrtss
2060 define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize {
2061 ; CHECK-LABEL: stack_fold_rsqrtss_int:
2063 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2064 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2067 ; CHECK-NEXT: #NO_APP
2068 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
2069 ; CHECK-NEXT: rsqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2071 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2072 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a1)
2073 %3 = extractelement <4 x float> %2, i32 0
2074 %4 = insertelement <4 x float> %a0, float %3, i32 0
2077 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
2079 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
2080 ; CHECK-LABEL: stack_fold_shufpd:
2082 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2085 ; CHECK-NEXT: #NO_APP
2086 ; CHECK-NEXT: shufpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2087 ; CHECK-NEXT: # xmm0 = xmm0[1],mem[0]
2088 ; CHECK-NEXT: xorpd %xmm1, %xmm1
2089 ; CHECK-NEXT: addpd %xmm1, %xmm0
2091 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2092 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
2093 ; fadd forces execution domain
2094 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
2098 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) {
2099 ; CHECK-LABEL: stack_fold_shufps:
2101 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2104 ; CHECK-NEXT: #NO_APP
2105 ; CHECK-NEXT: shufps $200, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2106 ; CHECK-NEXT: # xmm0 = xmm0[0,2],mem[0,3]
2108 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2109 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7>
2113 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) {
2114 ; CHECK-LABEL: stack_fold_sqrtpd:
2116 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2119 ; CHECK-NEXT: #NO_APP
2120 ; CHECK-NEXT: sqrtpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2122 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2123 %2 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
2127 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) {
2128 ; CHECK-LABEL: stack_fold_sqrtps:
2130 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2133 ; CHECK-NEXT: #NO_APP
2134 ; CHECK-NEXT: sqrtps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2137 %2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0)
2141 define double @stack_fold_sqrtsd(double %a0) optsize {
2142 ; CHECK-LABEL: stack_fold_sqrtsd:
2144 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2147 ; CHECK-NEXT: #NO_APP
2148 ; CHECK-NEXT: xorps %xmm0, %xmm0
2149 ; CHECK-NEXT: sqrtsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
2151 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2152 %2 = call double @llvm.sqrt.f64(double %a0)
2155 declare double @llvm.sqrt.f64(double) nounwind readnone
2157 define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
2158 ; CHECK-LABEL: stack_fold_sqrtsd_int:
2160 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2161 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2164 ; CHECK-NEXT: #NO_APP
2165 ; CHECK-NEXT: movapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
2166 ; CHECK-NEXT: sqrtsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2168 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2169 %2 = extractelement <2 x double> %a1, i64 0
2170 %3 = call double @llvm.sqrt.f64(double %2)
2171 %4 = insertelement <2 x double> %a1, double %3, i64 0
2172 %5 = extractelement <2 x double> %4, i32 0
2173 %6 = insertelement <2 x double> %a0, double %5, i32 0
2177 define float @stack_fold_sqrtss(float %a0) minsize {
2178 ; CHECK-LABEL: stack_fold_sqrtss:
2180 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2183 ; CHECK-NEXT: #NO_APP
2184 ; CHECK-NEXT: xorps %xmm0, %xmm0
2185 ; CHECK-NEXT: sqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
2187 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2188 %2 = call float @llvm.sqrt.f32(float %a0)
2191 declare float @llvm.sqrt.f32(float) nounwind readnone
2193 define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize {
2194 ; CHECK-LABEL: stack_fold_sqrtss_int:
2196 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2197 ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2200 ; CHECK-NEXT: #NO_APP
2201 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
2202 ; CHECK-NEXT: sqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2204 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2205 %2 = extractelement <4 x float> %a1, i64 0
2206 %3 = call float @llvm.sqrt.f32(float %2)
2207 %4 = insertelement <4 x float> %a1, float %3, i64 0
2208 %5 = extractelement <4 x float> %4, i32 0
2209 %6 = insertelement <4 x float> %a0, float %5, i32 0
2213 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
2214 ; CHECK-LABEL: stack_fold_subpd:
2216 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2219 ; CHECK-NEXT: #NO_APP
2220 ; CHECK-NEXT: subpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2222 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2223 %2 = fsub <2 x double> %a0, %a1
2227 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) {
2228 ; CHECK-LABEL: stack_fold_subps:
2230 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2233 ; CHECK-NEXT: #NO_APP
2234 ; CHECK-NEXT: subps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2236 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2237 %2 = fsub <4 x float> %a0, %a1
2241 define double @stack_fold_subsd(double %a0, double %a1) {
2242 ; CHECK-LABEL: stack_fold_subsd:
2244 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2247 ; CHECK-NEXT: #NO_APP
2248 ; CHECK-NEXT: subsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
2250 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2251 %2 = fsub double %a0, %a1
2255 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
2256 ; CHECK-LABEL: stack_fold_subsd_int:
2258 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2261 ; CHECK-NEXT: #NO_APP
2262 ; CHECK-NEXT: subsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2264 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2265 %2 = extractelement <2 x double> %a0, i32 0
2266 %3 = extractelement <2 x double> %a1, i32 0
2267 %4 = fsub double %2, %3
2268 %5 = insertelement <2 x double> %a0, double %4, i32 0
2272 define float @stack_fold_subss(float %a0, float %a1) {
2273 ; CHECK-LABEL: stack_fold_subss:
2275 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2278 ; CHECK-NEXT: #NO_APP
2279 ; CHECK-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
2281 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2282 %2 = fsub float %a0, %a1
2286 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
2287 ; CHECK-LABEL: stack_fold_subss_int:
2289 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2292 ; CHECK-NEXT: #NO_APP
2293 ; CHECK-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2295 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2296 %2 = extractelement <4 x float> %a0, i32 0
2297 %3 = extractelement <4 x float> %a1, i32 0
2298 %4 = fsub float %2, %3
2299 %5 = insertelement <4 x float> %a0, float %4, i32 0
2303 define i32 @stack_fold_ucomisd(double %a0, double %a1) {
2304 ; CHECK-LABEL: stack_fold_ucomisd:
2306 ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2309 ; CHECK-NEXT: #NO_APP
2310 ; CHECK-NEXT: xorl %eax, %eax
2311 ; CHECK-NEXT: ucomisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
2312 ; CHECK-NEXT: sete %al
2313 ; CHECK-NEXT: leal -1(%rax,%rax), %eax
2315 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2316 %2 = fcmp ueq double %a0, %a1
2317 %3 = select i1 %2, i32 1, i32 -1
2321 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) {
2322 ; CHECK-LABEL: stack_fold_ucomisd_int:
2324 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2327 ; CHECK-NEXT: #NO_APP
2328 ; CHECK-NEXT: ucomisd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2329 ; CHECK-NEXT: setnp %al
2330 ; CHECK-NEXT: sete %cl
2331 ; CHECK-NEXT: andb %al, %cl
2332 ; CHECK-NEXT: movzbl %cl, %eax
2334 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2335 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
2338 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
2340 define i32 @stack_fold_ucomiss(float %a0, float %a1) {
2341 ; CHECK-LABEL: stack_fold_ucomiss:
2343 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
2346 ; CHECK-NEXT: #NO_APP
2347 ; CHECK-NEXT: xorl %eax, %eax
2348 ; CHECK-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
2349 ; CHECK-NEXT: sete %al
2350 ; CHECK-NEXT: leal -1(%rax,%rax), %eax
2352 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2353 %2 = fcmp ueq float %a0, %a1
2354 %3 = select i1 %2, i32 1, i32 -1
2358 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) {
2359 ; CHECK-LABEL: stack_fold_ucomiss_int:
2361 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2364 ; CHECK-NEXT: #NO_APP
2365 ; CHECK-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2366 ; CHECK-NEXT: setnp %al
2367 ; CHECK-NEXT: sete %cl
2368 ; CHECK-NEXT: andb %al, %cl
2369 ; CHECK-NEXT: movzbl %cl, %eax
2371 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2372 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
2375 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
2377 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) {
2378 ; CHECK-LABEL: stack_fold_unpckhpd:
2380 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2383 ; CHECK-NEXT: #NO_APP
2384 ; CHECK-NEXT: unpckhpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2385 ; CHECK-NEXT: # xmm0 = xmm0[1],mem[1]
2386 ; CHECK-NEXT: xorpd %xmm1, %xmm1
2387 ; CHECK-NEXT: addpd %xmm1, %xmm0
2389 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2390 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
2391 ; fadd forces execution domain
2392 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
2396 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) {
2397 ; CHECK-LABEL: stack_fold_unpckhps:
2399 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2402 ; CHECK-NEXT: #NO_APP
2403 ; CHECK-NEXT: unpckhps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2404 ; CHECK-NEXT: # xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
2405 ; CHECK-NEXT: xorps %xmm1, %xmm1
2406 ; CHECK-NEXT: addps %xmm1, %xmm0
2408 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2409 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
2410 ; fadd forces execution domain
2411 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
2415 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) {
2416 ; CHECK-LABEL: stack_fold_unpcklpd:
2418 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2421 ; CHECK-NEXT: #NO_APP
2422 ; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2423 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
2424 ; CHECK-NEXT: xorpd %xmm1, %xmm1
2425 ; CHECK-NEXT: addpd %xmm1, %xmm0
2427 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2428 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
2429 ; fadd forces execution domain
2430 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
2434 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) {
2435 ; CHECK-LABEL: stack_fold_unpcklps:
2437 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2440 ; CHECK-NEXT: #NO_APP
2441 ; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2442 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
2443 ; CHECK-NEXT: xorps %xmm1, %xmm1
2444 ; CHECK-NEXT: addps %xmm1, %xmm0
2446 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2447 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
2448 ; fadd forces execution domain
2449 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
2453 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
2454 ; CHECK-LABEL: stack_fold_xorpd:
2456 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2459 ; CHECK-NEXT: #NO_APP
2460 ; CHECK-NEXT: xorpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2461 ; CHECK-NEXT: xorpd %xmm1, %xmm1
2462 ; CHECK-NEXT: addpd %xmm1, %xmm0
2464 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2465 %2 = bitcast <2 x double> %a0 to <2 x i64>
2466 %3 = bitcast <2 x double> %a1 to <2 x i64>
2467 %4 = xor <2 x i64> %2, %3
2468 %5 = bitcast <2 x i64> %4 to <2 x double>
2469 ; fadd forces execution domain
2470 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
2474 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
2475 ; CHECK-LABEL: stack_fold_xorps:
2477 ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2480 ; CHECK-NEXT: #NO_APP
2481 ; CHECK-NEXT: xorps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
2482 ; CHECK-NEXT: xorps %xmm1, %xmm1
2483 ; CHECK-NEXT: addps %xmm1, %xmm0
2485 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
2486 %2 = bitcast <4 x float> %a0 to <2 x i64>
2487 %3 = bitcast <4 x float> %a1 to <2 x i64>
2488 %4 = xor <2 x i64> %2, %3
2489 %5 = bitcast <2 x i64> %4 to <4 x float>
2490 ; fadd forces execution domain
2491 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
2495 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
2496 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
2498 attributes #0 = { "unsafe-fp-math"="false" }
2499 attributes #1 = { "unsafe-fp-math"="true" }