1 ; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-unknown"
6 ; Stack reload folding tests.
8 ; By including a nop call with sideeffects we can force a partial register spill of the
9 ; relevant registers and check that the reload is correctly folded into the instruction.
11 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) {
12 ;CHECK-LABEL: stack_fold_addpd
13 ;CHECK: addpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
15 %2 = fadd <2 x double> %a0, %a1
19 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) {
20 ;CHECK-LABEL: stack_fold_addps
21 ;CHECK: addps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
23 %2 = fadd <4 x float> %a0, %a1
27 define double @stack_fold_addsd(double %a0, double %a1) {
28 ;CHECK-LABEL: stack_fold_addsd
29 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
31 %2 = fadd double %a0, %a1
35 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
36 ;CHECK-LABEL: stack_fold_addsd_int
37 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
39 %2 = extractelement <2 x double> %a0, i32 0
40 %3 = extractelement <2 x double> %a1, i32 0
41 %4 = fadd double %2, %3
42 %5 = insertelement <2 x double> %a0, double %4, i32 0
46 define float @stack_fold_addss(float %a0, float %a1) {
47 ;CHECK-LABEL: stack_fold_addss
48 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
49 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
50 %2 = fadd float %a0, %a1
54 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
55 ;CHECK-LABEL: stack_fold_addss_int
56 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
57 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
58 %2 = extractelement <4 x float> %a0, i32 0
59 %3 = extractelement <4 x float> %a1, i32 0
60 %4 = fadd float %2, %3
61 %5 = insertelement <4 x float> %a0, float %4, i32 0
65 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) {
66 ;CHECK-LABEL: stack_fold_addsubpd
67 ;CHECK: addsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
68 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
69 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
72 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
74 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) {
75 ;CHECK-LABEL: stack_fold_addsubps
76 ;CHECK: addsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
77 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
78 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
81 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
83 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
84 ;CHECK-LABEL: stack_fold_andnpd
85 ;CHECK: andnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
86 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
87 %2 = bitcast <2 x double> %a0 to <2 x i64>
88 %3 = bitcast <2 x double> %a1 to <2 x i64>
89 %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
90 %5 = and <2 x i64> %4, %3
91 %6 = bitcast <2 x i64> %5 to <2 x double>
92 ; fadd forces execution domain
93 %7 = fadd <2 x double> %6, <double 0x0, double 0x0>
97 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) {
98 ;CHECK-LABEL: stack_fold_andnps
99 ;CHECK: andnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
100 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
101 %2 = bitcast <4 x float> %a0 to <2 x i64>
102 %3 = bitcast <4 x float> %a1 to <2 x i64>
103 %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
104 %5 = and <2 x i64> %4, %3
105 %6 = bitcast <2 x i64> %5 to <4 x float>
106 ; fadd forces execution domain
107 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0>
111 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
112 ;CHECK-LABEL: stack_fold_andpd
113 ;CHECK: andpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
114 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
115 %2 = bitcast <2 x double> %a0 to <2 x i64>
116 %3 = bitcast <2 x double> %a1 to <2 x i64>
117 %4 = and <2 x i64> %2, %3
118 %5 = bitcast <2 x i64> %4 to <2 x double>
119 ; fadd forces execution domain
120 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
124 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) {
125 ;CHECK-LABEL: stack_fold_andps
126 ;CHECK: andps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
127 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
128 %2 = bitcast <4 x float> %a0 to <2 x i64>
129 %3 = bitcast <4 x float> %a1 to <2 x i64>
130 %4 = and <2 x i64> %2, %3
131 %5 = bitcast <2 x i64> %4 to <4 x float>
132 ; fadd forces execution domain
133 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
137 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) {
138 ;CHECK-LABEL: stack_fold_blendpd
139 ;CHECK: blendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
140 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
141 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1
142 ; fadd forces execution domain
143 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
147 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) {
148 ;CHECK-LABEL: stack_fold_blendps
149 ;CHECK: blendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
150 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
151 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1
152 ; fadd forces execution domain
153 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
157 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) {
158 ;CHECK-LABEL: stack_fold_blendvpd
159 ;CHECK: blendvpd %xmm0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
160 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
161 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0)
164 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
166 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) {
167 ;CHECK-LABEL: stack_fold_blendvps
168 ;CHECK: blendvps %xmm0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
169 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
170 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0)
173 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
175 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) {
176 ;CHECK-LABEL: stack_fold_cmppd
177 ;CHECK: cmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
178 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
179 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
182 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
184 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
185 ;CHECK-LABEL: stack_fold_cmpps
186 ;CHECK: cmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
187 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
188 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0)
191 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
193 define i32 @stack_fold_cmpsd(double %a0, double %a1) {
194 ;CHECK-LABEL: stack_fold_cmpsd
195 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
196 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
197 %2 = fcmp oeq double %a0, %a1
198 %3 = zext i1 %2 to i32
202 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) {
203 ;CHECK-LABEL: stack_fold_cmpsd_int
204 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
205 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
206 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
209 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
211 define i32 @stack_fold_cmpss(float %a0, float %a1) {
212 ;CHECK-LABEL: stack_fold_cmpss
213 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
214 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
215 %2 = fcmp oeq float %a0, %a1
216 %3 = zext i1 %2 to i32
220 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) {
221 ;CHECK-LABEL: stack_fold_cmpss_int
222 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
223 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
224 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
227 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
229 ; TODO stack_fold_comisd
231 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) {
232 ;CHECK-LABEL: stack_fold_comisd_int
233 ;CHECK: comisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
234 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
235 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
238 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
240 ; TODO stack_fold_comiss
242 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) {
243 ;CHECK-LABEL: stack_fold_comiss_int
244 ;CHECK: comiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
245 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
246 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
249 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
251 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) {
252 ;CHECK-LABEL: stack_fold_cvtdq2pd
253 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
254 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
255 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
256 %3 = sitofp <2 x i32> %2 to <2 x double>
260 define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) {
261 ;CHECK-LABEL: stack_fold_cvtdq2pd_int
262 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
263 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
264 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a0, <2 x i32> <i32 0, i32 1>
265 %cvt = sitofp <2 x i32> %2 to <2 x double>
266 ret <2 x double> %cvt
269 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
270 ;CHECK-LABEL: stack_fold_cvtdq2ps
271 ;CHECK: cvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
273 %2 = sitofp <4 x i32> %a0 to <4 x float>
277 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) {
278 ;CHECK-LABEL: stack_fold_cvtpd2dq
279 ;CHECK: cvtpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
280 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
281 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
284 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
286 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) {
287 ;CHECK-LABEL: stack_fold_cvtpd2ps
288 ;CHECK: cvtpd2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
289 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
290 %2 = fptrunc <2 x double> %a0 to <2 x float>
294 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) {
295 ;CHECK-LABEL: stack_fold_cvtps2dq
296 ;CHECK: cvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
297 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
298 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
301 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
303 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) {
304 ;CHECK-LABEL: stack_fold_cvtps2pd
305 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
306 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
307 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1>
308 %3 = fpext <2 x float> %2 to <2 x double>
312 define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) {
313 ;CHECK-LABEL: stack_fold_cvtps2pd_int
314 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
315 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
316 %2 = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
317 %cvtps2pd = fpext <2 x float> %2 to <2 x double>
318 ret <2 x double> %cvtps2pd
321 ; TODO stack_fold_cvtsd2si
323 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) {
324 ;CHECK-LABEL: stack_fold_cvtsd2si_int
325 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
326 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
327 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
330 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
332 ; TODO stack_fold_cvtsd2si64
334 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
335 ;CHECK-LABEL: stack_fold_cvtsd2si64_int
336 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
337 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
338 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0)
341 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
343 define float @stack_fold_cvtsd2ss(double %a0) minsize {
344 ;CHECK-LABEL: stack_fold_cvtsd2ss
345 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
346 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
347 %2 = fptrunc double %a0 to float
351 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize {
352 ;CHECK-LABEL: stack_fold_cvtsd2ss_int
353 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
354 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
355 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0)
358 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
360 define double @stack_fold_cvtsi2sd(i32 %a0) {
361 ;CHECK-LABEL: stack_fold_cvtsi2sd
362 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
363 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
364 %2 = sitofp i32 %a0 to double
368 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0, <2 x double> %b0) {
369 ;CHECK-LABEL: stack_fold_cvtsi2sd_int
370 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
371 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
372 %2 = sitofp i32 %a0 to double
373 %3 = insertelement <2 x double> %b0, double %2, i64 0
377 define double @stack_fold_cvtsi642sd(i64 %a0) {
378 ;CHECK-LABEL: stack_fold_cvtsi642sd
379 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
380 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
381 %2 = sitofp i64 %a0 to double
385 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0, <2 x double> %b0) {
386 ;CHECK-LABEL: stack_fold_cvtsi642sd_int
387 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
388 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
389 %2 = sitofp i64 %a0 to double
390 %3 = insertelement <2 x double> %b0, double %2, i64 0
394 define float @stack_fold_cvtsi2ss(i32 %a0) {
395 ;CHECK-LABEL: stack_fold_cvtsi2ss
396 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
397 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
398 %2 = sitofp i32 %a0 to float
402 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0, <4 x float> %b0) {
403 ;CHECK-LABEL: stack_fold_cvtsi2ss_int
404 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
405 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
406 %2 = sitofp i32 %a0 to float
407 %3 = insertelement <4 x float> %b0, float %2, i64 0
411 define float @stack_fold_cvtsi642ss(i64 %a0) {
412 ;CHECK-LABEL: stack_fold_cvtsi642ss
413 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
414 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
415 %2 = sitofp i64 %a0 to float
419 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0, <4 x float> %b0) {
420 ;CHECK-LABEL: stack_fold_cvtsi642ss_int
421 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
422 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
423 %2 = sitofp i64 %a0 to float
424 %3 = insertelement <4 x float> %b0, float %2, i64 0
428 define double @stack_fold_cvtss2sd(float %a0) minsize {
429 ;CHECK-LABEL: stack_fold_cvtss2sd
430 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
431 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
432 %2 = fpext float %a0 to double
436 define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize {
437 ;CHECK-LABEL: stack_fold_cvtss2sd_int
438 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
439 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
440 %2 = extractelement <4 x float> %a0, i64 0
441 %3 = fpext float %2 to double
442 %4 = insertelement <2 x double> zeroinitializer, double %3, i64 0
446 ; TODO stack_fold_cvtss2si
448 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) {
449 ;CHECK-LABEL: stack_fold_cvtss2si_int
450 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
451 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
452 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
455 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
457 ; TODO stack_fold_cvtss2si64
459 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) {
460 ;CHECK-LABEL: stack_fold_cvtss2si64_int
461 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
462 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
463 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0)
466 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
468 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) {
469 ;CHECK-LABEL: stack_fold_cvttpd2dq
470 ;CHECK: cvttpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
471 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
472 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
475 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
477 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) {
478 ;CHECK-LABEL: stack_fold_cvttps2dq
479 ;CHECK: cvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
481 %2 = fptosi <4 x float> %a0 to <4 x i32>
485 define i32 @stack_fold_cvttsd2si(double %a0) {
486 ;CHECK-LABEL: stack_fold_cvttsd2si
487 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload
488 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
489 %2 = fptosi double %a0 to i32
493 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) {
494 ;CHECK-LABEL: stack_fold_cvttsd2si_int
495 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
496 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
497 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
500 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
502 define i64 @stack_fold_cvttsd2si64(double %a0) {
503 ;CHECK-LABEL: stack_fold_cvttsd2si64
504 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
505 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
506 %2 = fptosi double %a0 to i64
510 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) {
511 ;CHECK-LABEL: stack_fold_cvttsd2si64_int
512 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
513 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
514 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0)
517 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
519 define i32 @stack_fold_cvttss2si(float %a0) {
520 ;CHECK-LABEL: stack_fold_cvttss2si
521 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
522 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
523 %2 = fptosi float %a0 to i32
527 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) {
528 ;CHECK-LABEL: stack_fold_cvttss2si_int
529 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
530 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
531 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
534 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
536 define i64 @stack_fold_cvttss2si64(float %a0) {
537 ;CHECK-LABEL: stack_fold_cvttss2si64
538 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload
539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
540 %2 = fptosi float %a0 to i64
544 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) {
545 ;CHECK-LABEL: stack_fold_cvttss2si64_int
546 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
547 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
548 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0)
551 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
553 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
554 ;CHECK-LABEL: stack_fold_divpd
555 ;CHECK: divpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
556 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
557 %2 = fdiv <2 x double> %a0, %a1
561 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) {
562 ;CHECK-LABEL: stack_fold_divps
563 ;CHECK: divps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
564 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
565 %2 = fdiv <4 x float> %a0, %a1
569 define double @stack_fold_divsd(double %a0, double %a1) {
570 ;CHECK-LABEL: stack_fold_divsd
571 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
572 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
573 %2 = fdiv double %a0, %a1
577 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
578 ;CHECK-LABEL: stack_fold_divsd_int
579 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
580 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
581 %2 = extractelement <2 x double> %a0, i32 0
582 %3 = extractelement <2 x double> %a1, i32 0
583 %4 = fdiv double %2, %3
584 %5 = insertelement <2 x double> %a0, double %4, i32 0
587 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
589 define float @stack_fold_divss(float %a0, float %a1) {
590 ;CHECK-LABEL: stack_fold_divss
591 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
592 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
593 %2 = fdiv float %a0, %a1
597 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
598 ;CHECK-LABEL: stack_fold_divss_int
599 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
600 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
601 %2 = extractelement <4 x float> %a0, i32 0
602 %3 = extractelement <4 x float> %a1, i32 0
603 %4 = fdiv float %2, %3
604 %5 = insertelement <4 x float> %a0, float %4, i32 0
607 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
609 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) {
610 ;CHECK-LABEL: stack_fold_dppd
611 ;CHECK: dppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
612 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
613 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
616 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
618 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) {
619 ;CHECK-LABEL: stack_fold_dpps
620 ;CHECK: dpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
621 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
622 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
625 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
627 define i32 @stack_fold_extractps(<4 x float> %a0, <4 x float> %a1) {
628 ;CHECK-LABEL: stack_fold_extractps
629 ;CHECK: extractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
630 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
631 ; fadd forces execution domain
632 %1 = fadd <4 x float> %a0, %a1
633 %2 = extractelement <4 x float> %1, i32 1
634 %3 = bitcast float %2 to i32
635 %4 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
639 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) {
640 ;CHECK-LABEL: stack_fold_haddpd
641 ;CHECK: haddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
642 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
643 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
646 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
648 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) {
649 ;CHECK-LABEL: stack_fold_haddps
650 ;CHECK: haddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
651 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
652 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
655 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
657 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) {
658 ;CHECK-LABEL: stack_fold_hsubpd
659 ;CHECK: hsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
660 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
661 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
664 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
666 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
667 ;CHECK-LABEL: stack_fold_hsubps
668 ;CHECK: hsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
669 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
670 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
673 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
675 define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
676 ;CHECK-LABEL: stack_fold_insertps
677 ;CHECK: insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
678 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3]
679 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
680 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
683 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
685 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 {
686 ;CHECK-LABEL: stack_fold_maxpd
687 ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
688 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
689 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
692 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
694 define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
695 ;CHECK-LABEL: stack_fold_maxpd_commutable
696 ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
697 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
698 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
702 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 {
703 ;CHECK-LABEL: stack_fold_maxps
704 ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
705 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
706 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
709 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
711 define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
712 ;CHECK-LABEL: stack_fold_maxps_commutable
713 ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
714 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
715 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
719 define double @stack_fold_maxsd(double %a0, double %a1) #0 {
720 ;CHECK-LABEL: stack_fold_maxsd
721 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
722 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
723 %2 = fcmp ogt double %a0, %a1
724 %3 = select i1 %2, double %a0, double %a1
728 define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 {
729 ;CHECK-LABEL: stack_fold_maxsd_commutable
730 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
731 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
732 %2 = fcmp ogt double %a0, %a1
733 %3 = select i1 %2, double %a0, double %a1
737 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 {
738 ;CHECK-LABEL: stack_fold_maxsd_int
739 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
740 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
741 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
744 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
746 define float @stack_fold_maxss(float %a0, float %a1) #0 {
747 ;CHECK-LABEL: stack_fold_maxss
748 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
749 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
750 %2 = fcmp ogt float %a0, %a1
751 %3 = select i1 %2, float %a0, float %a1
755 define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 {
756 ;CHECK-LABEL: stack_fold_maxss_commutable
757 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
758 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
759 %2 = fcmp ogt float %a0, %a1
760 %3 = select i1 %2, float %a0, float %a1
764 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 {
765 ;CHECK-LABEL: stack_fold_maxss_int
766 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
767 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
768 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
771 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
773 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 {
774 ;CHECK-LABEL: stack_fold_minpd
775 ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
776 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
777 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
780 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
782 define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 {
783 ;CHECK-LABEL: stack_fold_minpd_commutable
784 ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
785 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
786 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
790 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 {
791 ;CHECK-LABEL: stack_fold_minps
792 ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
793 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
794 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
797 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
799 define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 {
800 ;CHECK-LABEL: stack_fold_minps_commutable
801 ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
802 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
803 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
807 define double @stack_fold_minsd(double %a0, double %a1) #0 {
808 ;CHECK-LABEL: stack_fold_minsd
809 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
810 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
811 %2 = fcmp olt double %a0, %a1
812 %3 = select i1 %2, double %a0, double %a1
816 define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 {
817 ;CHECK-LABEL: stack_fold_minsd_commutable
818 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
819 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
820 %2 = fcmp olt double %a0, %a1
821 %3 = select i1 %2, double %a0, double %a1
825 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) #0 {
826 ;CHECK-LABEL: stack_fold_minsd_int
827 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
828 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
829 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
832 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
834 define float @stack_fold_minss(float %a0, float %a1) #0 {
835 ;CHECK-LABEL: stack_fold_minss
836 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
837 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
838 %2 = fcmp olt float %a0, %a1
839 %3 = select i1 %2, float %a0, float %a1
843 define float @stack_fold_minss_commutable(float %a0, float %a1) #1 {
844 ;CHECK-LABEL: stack_fold_minss_commutable
845 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
846 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
847 %2 = fcmp olt float %a0, %a1
848 %3 = select i1 %2, float %a0, float %a1
852 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 {
853 ;CHECK-LABEL: stack_fold_minss_int
854 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
855 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
856 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
859 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
861 define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
862 ;CHECK-LABEL: stack_fold_movddup
863 ;CHECK: movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
864 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
865 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
868 ; TODO stack_fold_movhpd (load / store)
869 ; TODO stack_fold_movhps (load / store)
871 ; TODO stack_fold_movlpd (load / store)
872 ; TODO stack_fold_movlps (load / store)
874 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
875 ;CHECK-LABEL: stack_fold_movshdup
876 ;CHECK: movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
877 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
878 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
882 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
883 ;CHECK-LABEL: stack_fold_movsldup
884 ;CHECK: movsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
885 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
886 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
890 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) {
891 ;CHECK-LABEL: stack_fold_mulpd
892 ;CHECK: mulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
893 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
894 %2 = fmul <2 x double> %a0, %a1
898 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) {
899 ;CHECK-LABEL: stack_fold_mulps
900 ;CHECK: mulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
901 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
902 %2 = fmul <4 x float> %a0, %a1
906 define double @stack_fold_mulsd(double %a0, double %a1) {
907 ;CHECK-LABEL: stack_fold_mulsd
908 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
909 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
910 %2 = fmul double %a0, %a1
914 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
915 ;CHECK-LABEL: stack_fold_mulsd_int
916 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
917 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
918 %2 = extractelement <2 x double> %a0, i32 0
919 %3 = extractelement <2 x double> %a1, i32 0
920 %4 = fmul double %2, %3
921 %5 = insertelement <2 x double> %a0, double %4, i32 0
925 define float @stack_fold_mulss(float %a0, float %a1) {
926 ;CHECK-LABEL: stack_fold_mulss
927 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
928 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
929 %2 = fmul float %a0, %a1
933 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
934 ;CHECK-LABEL: stack_fold_mulss_int
935 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
936 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
937 %2 = extractelement <4 x float> %a0, i32 0
938 %3 = extractelement <4 x float> %a1, i32 0
939 %4 = fmul float %2, %3
940 %5 = insertelement <4 x float> %a0, float %4, i32 0
944 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
945 ;CHECK-LABEL: stack_fold_orpd
946 ;CHECK: orpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
947 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
948 %2 = bitcast <2 x double> %a0 to <2 x i64>
949 %3 = bitcast <2 x double> %a1 to <2 x i64>
950 %4 = or <2 x i64> %2, %3
951 %5 = bitcast <2 x i64> %4 to <2 x double>
952 ; fadd forces execution domain
953 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
957 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) {
958 ;CHECK-LABEL: stack_fold_orps
959 ;CHECK: orps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
960 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
961 %2 = bitcast <4 x float> %a0 to <2 x i64>
962 %3 = bitcast <4 x float> %a1 to <2 x i64>
963 %4 = or <2 x i64> %2, %3
964 %5 = bitcast <2 x i64> %4 to <4 x float>
965 ; fadd forces execution domain
966 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
970 ; TODO stack_fold_rcpps
972 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) {
973 ;CHECK-LABEL: stack_fold_rcpps_int
974 ;CHECK: rcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
975 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
976 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
979 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
981 ; TODO stack_fold_rcpss
983 define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0, <4 x float> %a1) optsize {
984 ;CHECK-LABEL: stack_fold_rcpss_int
985 ;CHECK: rcpss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
986 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
987 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a1)
988 %3 = extractelement <4 x float> %2, i32 0
989 %4 = insertelement <4 x float> %a0, float %3, i32 0
992 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
994 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
995 ;CHECK-LABEL: stack_fold_roundpd
996 ;CHECK: roundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
997 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
998 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
1001 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1003 define <4 x float> @stack_fold_roundps(<4 x float> %a0) {
1004 ;CHECK-LABEL: stack_fold_roundps
1005 ;CHECK: roundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1006 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1007 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
1010 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1012 define double @stack_fold_roundsd(double %a0) optsize {
1013 ;CHECK-LABEL: stack_fold_roundsd
1014 ;CHECK: roundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1015 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1016 %2 = call double @llvm.floor.f64(double %a0)
1019 declare double @llvm.floor.f64(double) nounwind readnone
1021 define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
1022 ;CHECK-LABEL: stack_fold_roundsd_int
1023 ;CHECK: roundsd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1024 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1025 %2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
1028 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
1030 define float @stack_fold_roundss(float %a0) minsize {
1031 ;CHECK-LABEL: stack_fold_roundss
1032 ;CHECK: roundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1033 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1034 %2 = call float @llvm.floor.f32(float %a0)
1037 declare float @llvm.floor.f32(float) nounwind readnone
1039 define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize {
1040 ;CHECK-LABEL: stack_fold_roundss_int
1041 ;CHECK: roundss $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1042 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1043 %2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
1046 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
1048 ; TODO stack_fold_rsqrtps
1050 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) {
1051 ;CHECK-LABEL: stack_fold_rsqrtps_int
1052 ;CHECK: rsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1053 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1054 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
1057 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1059 ; TODO stack_fold_rsqrtss
1061 define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize {
1062 ;CHECK-LABEL: stack_fold_rsqrtss_int
1063 ;CHECK: rsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1064 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1065 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a1)
1066 %3 = extractelement <4 x float> %2, i32 0
1067 %4 = insertelement <4 x float> %a0, float %3, i32 0
1070 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
1072 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
1073 ;CHECK-LABEL: stack_fold_shufpd
1074 ;CHECK: shufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1075 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1076 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
1077 ; fadd forces execution domain
1078 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
1082 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) {
1083 ;CHECK-LABEL: stack_fold_shufps
1084 ;CHECK: shufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1085 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1086 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7>
1090 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) {
1091 ;CHECK-LABEL: stack_fold_sqrtpd
1092 ;CHECK: sqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1093 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1094 %2 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
1098 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) {
1099 ;CHECK-LABEL: stack_fold_sqrtps
1100 ;CHECK: sqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1101 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1102 %2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0)
1106 define double @stack_fold_sqrtsd(double %a0) optsize {
1107 ;CHECK-LABEL: stack_fold_sqrtsd
1108 ;CHECK: sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1109 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1110 %2 = call double @llvm.sqrt.f64(double %a0)
1113 declare double @llvm.sqrt.f64(double) nounwind readnone
1115 define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
1116 ;CHECK-LABEL: stack_fold_sqrtsd_int
1117 ;CHECK: sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1118 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1119 %2 = extractelement <2 x double> %a1, i64 0
1120 %3 = call double @llvm.sqrt.f64(double %2)
1121 %4 = insertelement <2 x double> %a1, double %3, i64 0
1122 %5 = extractelement <2 x double> %4, i32 0
1123 %6 = insertelement <2 x double> %a0, double %5, i32 0
1127 define float @stack_fold_sqrtss(float %a0) minsize {
1128 ;CHECK-LABEL: stack_fold_sqrtss
1129 ;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1130 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1131 %2 = call float @llvm.sqrt.f32(float %a0)
1134 declare float @llvm.sqrt.f32(float) nounwind readnone
1136 define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0, <4 x float> %a1) optsize {
1137 ;CHECK-LABEL: stack_fold_sqrtss_int
1138 ;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1139 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1140 %2 = extractelement <4 x float> %a1, i64 0
1141 %3 = call float @llvm.sqrt.f32(float %2)
1142 %4 = insertelement <4 x float> %a1, float %3, i64 0
1143 %5 = extractelement <4 x float> %4, i32 0
1144 %6 = insertelement <4 x float> %a0, float %5, i32 0
1148 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
1149 ;CHECK-LABEL: stack_fold_subpd
1150 ;CHECK: subpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1151 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1152 %2 = fsub <2 x double> %a0, %a1
1156 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) {
1157 ;CHECK-LABEL: stack_fold_subps
1158 ;CHECK: subps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1159 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1160 %2 = fsub <4 x float> %a0, %a1
1164 define double @stack_fold_subsd(double %a0, double %a1) {
1165 ;CHECK-LABEL: stack_fold_subsd
1166 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1167 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1168 %2 = fsub double %a0, %a1
1172 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
1173 ;CHECK-LABEL: stack_fold_subsd_int
1174 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1175 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1176 %2 = extractelement <2 x double> %a0, i32 0
1177 %3 = extractelement <2 x double> %a1, i32 0
1178 %4 = fsub double %2, %3
1179 %5 = insertelement <2 x double> %a0, double %4, i32 0
1183 define float @stack_fold_subss(float %a0, float %a1) {
1184 ;CHECK-LABEL: stack_fold_subss
1185 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1186 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1187 %2 = fsub float %a0, %a1
1191 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
1192 ;CHECK-LABEL: stack_fold_subss_int
1193 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1194 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1195 %2 = extractelement <4 x float> %a0, i32 0
1196 %3 = extractelement <4 x float> %a1, i32 0
1197 %4 = fsub float %2, %3
1198 %5 = insertelement <4 x float> %a0, float %4, i32 0
1202 define i32 @stack_fold_ucomisd(double %a0, double %a1) {
1203 ;CHECK-LABEL: stack_fold_ucomisd
1204 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1205 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1206 %2 = fcmp ueq double %a0, %a1
1207 %3 = select i1 %2, i32 1, i32 -1
1211 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) {
1212 ;CHECK-LABEL: stack_fold_ucomisd_int
1213 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1214 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1215 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
1218 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1220 define i32 @stack_fold_ucomiss(float %a0, float %a1) {
1221 ;CHECK-LABEL: stack_fold_ucomiss
1222 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1223 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1224 %2 = fcmp ueq float %a0, %a1
1225 %3 = select i1 %2, i32 1, i32 -1
1229 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) {
1230 ;CHECK-LABEL: stack_fold_ucomiss_int
1231 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1232 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1233 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
1236 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
1238 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) {
1239 ;CHECK-LABEL: stack_fold_unpckhpd
1240 ;CHECK: unpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1241 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1242 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
1243 ; fadd forces execution domain
1244 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
1248 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) {
1249 ;CHECK-LABEL: stack_fold_unpckhps
1250 ;CHECK: unpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1251 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1252 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1253 ; fadd forces execution domain
1254 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
1258 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) {
1259 ;CHECK-LABEL: stack_fold_unpcklpd
1260 ;CHECK: unpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1261 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1262 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
1263 ; fadd forces execution domain
1264 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
1268 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) {
1269 ;CHECK-LABEL: stack_fold_unpcklps
1270 ;CHECK: unpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1271 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1272 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1273 ; fadd forces execution domain
1274 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
1278 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
1279 ;CHECK-LABEL: stack_fold_xorpd
1280 ;CHECK: xorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1281 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1282 %2 = bitcast <2 x double> %a0 to <2 x i64>
1283 %3 = bitcast <2 x double> %a1 to <2 x i64>
1284 %4 = xor <2 x i64> %2, %3
1285 %5 = bitcast <2 x i64> %4 to <2 x double>
1286 ; fadd forces execution domain
1287 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
1291 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
1292 ;CHECK-LABEL: stack_fold_xorps
1293 ;CHECK: xorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1294 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1295 %2 = bitcast <4 x float> %a0 to <2 x i64>
1296 %3 = bitcast <4 x float> %a1 to <2 x i64>
1297 %4 = xor <2 x i64> %2, %3
1298 %5 = bitcast <2 x i64> %4 to <4 x float>
1299 ; fadd forces execution domain
1300 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
1304 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
1305 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
1307 attributes #0 = { "unsafe-fp-math"="false" }
1308 attributes #1 = { "unsafe-fp-math"="true" }