1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
4 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
5 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
7 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
9 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse-builtins.c
11 define <4 x float> @test_mm_add_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
12 ; SSE-LABEL: test_mm_add_ps:
14 ; SSE-NEXT: addps %xmm1, %xmm0 # encoding: [0x0f,0x58,0xc1]
15 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
17 ; AVX1-LABEL: test_mm_add_ps:
19 ; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc1]
20 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
22 ; AVX512-LABEL: test_mm_add_ps:
24 ; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
25 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
26 %res = fadd <4 x float> %a0, %a1
30 define <4 x float> @test_mm_add_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
31 ; SSE-LABEL: test_mm_add_ss:
33 ; SSE-NEXT: addss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x58,0xc1]
34 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
36 ; AVX1-LABEL: test_mm_add_ss:
38 ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x58,0xc1]
39 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
41 ; AVX512-LABEL: test_mm_add_ss:
43 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1]
44 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
45 %ext0 = extractelement <4 x float> %a0, i32 0
46 %ext1 = extractelement <4 x float> %a1, i32 0
47 %fadd = fadd float %ext0, %ext1
48 %res = insertelement <4 x float> %a0, float %fadd, i32 0
52 define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
53 ; SSE-LABEL: test_mm_and_ps:
55 ; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
56 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
58 ; AVX1-LABEL: test_mm_and_ps:
60 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
61 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
63 ; AVX512-LABEL: test_mm_and_ps:
65 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
66 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
67 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
68 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
69 %res = and <4 x i32> %arg0, %arg1
70 %bc = bitcast <4 x i32> %res to <4 x float>
74 define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
75 ; SSE-LABEL: test_mm_andnot_ps:
77 ; SSE-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1]
78 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
80 ; AVX1-LABEL: test_mm_andnot_ps:
82 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
83 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
84 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
85 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
87 ; AVX512-LABEL: test_mm_andnot_ps:
89 ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
90 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
91 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
92 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
93 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
94 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
95 %res = and <4 x i32> %not, %arg1
96 %bc = bitcast <4 x i32> %res to <4 x float>
100 define <4 x float> @test_mm_cmpeq_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
101 ; SSE-LABEL: test_mm_cmpeq_ps:
103 ; SSE-NEXT: cmpeqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x00]
104 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
106 ; AVX1-LABEL: test_mm_cmpeq_ps:
108 ; AVX1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
109 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
111 ; AVX512-LABEL: test_mm_cmpeq_ps:
113 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00]
114 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
115 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
116 %cmp = fcmp oeq <4 x float> %a0, %a1
117 %sext = sext <4 x i1> %cmp to <4 x i32>
118 %res = bitcast <4 x i32> %sext to <4 x float>
122 define <4 x float> @test_mm_cmpeq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
123 ; SSE-LABEL: test_mm_cmpeq_ss:
125 ; SSE-NEXT: cmpeqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x00]
126 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
128 ; AVX-LABEL: test_mm_cmpeq_ss:
130 ; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x00]
131 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
132 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
135 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
137 define <4 x float> @test_mm_cmpge_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
138 ; SSE-LABEL: test_mm_cmpge_ps:
140 ; SSE-NEXT: cmpleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x02]
141 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
142 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
144 ; AVX1-LABEL: test_mm_cmpge_ps:
146 ; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x02]
147 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
149 ; AVX512-LABEL: test_mm_cmpge_ps:
151 ; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x02]
152 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
153 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
154 %cmp = fcmp ole <4 x float> %a1, %a0
155 %sext = sext <4 x i1> %cmp to <4 x i32>
156 %res = bitcast <4 x i32> %sext to <4 x float>
160 define <4 x float> @test_mm_cmpge_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
161 ; SSE-LABEL: test_mm_cmpge_ss:
163 ; SSE-NEXT: cmpless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x02]
164 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
165 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
166 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
168 ; AVX-LABEL: test_mm_cmpge_ss:
170 ; AVX-NEXT: vcmpless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x02]
171 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
172 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
173 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
174 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 2)
175 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
179 define <4 x float> @test_mm_cmpgt_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
180 ; SSE-LABEL: test_mm_cmpgt_ps:
182 ; SSE-NEXT: cmpltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x01]
183 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
184 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
186 ; AVX1-LABEL: test_mm_cmpgt_ps:
188 ; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x01]
189 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
191 ; AVX512-LABEL: test_mm_cmpgt_ps:
193 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x01]
194 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
195 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
196 %cmp = fcmp olt <4 x float> %a1, %a0
197 %sext = sext <4 x i1> %cmp to <4 x i32>
198 %res = bitcast <4 x i32> %sext to <4 x float>
202 define <4 x float> @test_mm_cmpgt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
203 ; SSE-LABEL: test_mm_cmpgt_ss:
205 ; SSE-NEXT: cmpltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x01]
206 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
207 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
208 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
210 ; AVX-LABEL: test_mm_cmpgt_ss:
212 ; AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x01]
213 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
214 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
215 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
216 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 1)
217 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
221 define <4 x float> @test_mm_cmple_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
222 ; SSE-LABEL: test_mm_cmple_ps:
224 ; SSE-NEXT: cmpleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x02]
225 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
227 ; AVX1-LABEL: test_mm_cmple_ps:
229 ; AVX1-NEXT: vcmpleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x02]
230 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
232 ; AVX512-LABEL: test_mm_cmple_ps:
234 ; AVX512-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
235 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
236 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
237 %cmp = fcmp ole <4 x float> %a0, %a1
238 %sext = sext <4 x i1> %cmp to <4 x i32>
239 %res = bitcast <4 x i32> %sext to <4 x float>
243 define <4 x float> @test_mm_cmple_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
244 ; SSE-LABEL: test_mm_cmple_ss:
246 ; SSE-NEXT: cmpless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x02]
247 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
249 ; AVX-LABEL: test_mm_cmple_ss:
251 ; AVX-NEXT: vcmpless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x02]
252 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
253 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 2)
257 define <4 x float> @test_mm_cmplt_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
258 ; SSE-LABEL: test_mm_cmplt_ps:
260 ; SSE-NEXT: cmpltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x01]
261 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
263 ; AVX1-LABEL: test_mm_cmplt_ps:
265 ; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x01]
266 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
268 ; AVX512-LABEL: test_mm_cmplt_ps:
270 ; AVX512-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01]
271 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
272 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
273 %cmp = fcmp olt <4 x float> %a0, %a1
274 %sext = sext <4 x i1> %cmp to <4 x i32>
275 %res = bitcast <4 x i32> %sext to <4 x float>
279 define <4 x float> @test_mm_cmplt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
280 ; SSE-LABEL: test_mm_cmplt_ss:
282 ; SSE-NEXT: cmpltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x01]
283 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
285 ; AVX-LABEL: test_mm_cmplt_ss:
287 ; AVX-NEXT: vcmpltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x01]
288 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
289 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 1)
293 define <4 x float> @test_mm_cmpneq_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
294 ; SSE-LABEL: test_mm_cmpneq_ps:
296 ; SSE-NEXT: cmpneqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x04]
297 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
299 ; AVX1-LABEL: test_mm_cmpneq_ps:
301 ; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x04]
302 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
304 ; AVX512-LABEL: test_mm_cmpneq_ps:
306 ; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x04]
307 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
308 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
309 %cmp = fcmp une <4 x float> %a0, %a1
310 %sext = sext <4 x i1> %cmp to <4 x i32>
311 %res = bitcast <4 x i32> %sext to <4 x float>
315 define <4 x float> @test_mm_cmpneq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
316 ; SSE-LABEL: test_mm_cmpneq_ss:
318 ; SSE-NEXT: cmpneqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x04]
319 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
321 ; AVX-LABEL: test_mm_cmpneq_ss:
323 ; AVX-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x04]
324 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
325 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 4)
329 define <4 x float> @test_mm_cmpnge_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
330 ; SSE-LABEL: test_mm_cmpnge_ps:
332 ; SSE-NEXT: cmpnleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x06]
333 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
334 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
336 ; AVX1-LABEL: test_mm_cmpnge_ps:
338 ; AVX1-NEXT: vcmpnleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x06]
339 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
341 ; AVX512-LABEL: test_mm_cmpnge_ps:
343 ; AVX512-NEXT: vcmpnleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x06]
344 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
345 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
346 %cmp = fcmp ugt <4 x float> %a1, %a0
347 %sext = sext <4 x i1> %cmp to <4 x i32>
348 %res = bitcast <4 x i32> %sext to <4 x float>
352 define <4 x float> @test_mm_cmpnge_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
353 ; SSE-LABEL: test_mm_cmpnge_ss:
355 ; SSE-NEXT: cmpnless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x06]
356 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
357 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
358 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
360 ; AVX-LABEL: test_mm_cmpnge_ss:
362 ; AVX-NEXT: vcmpnless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x06]
363 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
364 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
365 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
366 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 6)
367 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
371 define <4 x float> @test_mm_cmpngt_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
372 ; SSE-LABEL: test_mm_cmpngt_ps:
374 ; SSE-NEXT: cmpnltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x05]
375 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
376 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
378 ; AVX1-LABEL: test_mm_cmpngt_ps:
380 ; AVX1-NEXT: vcmpnltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x05]
381 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
383 ; AVX512-LABEL: test_mm_cmpngt_ps:
385 ; AVX512-NEXT: vcmpnltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x05]
386 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
387 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
388 %cmp = fcmp uge <4 x float> %a1, %a0
389 %sext = sext <4 x i1> %cmp to <4 x i32>
390 %res = bitcast <4 x i32> %sext to <4 x float>
394 define <4 x float> @test_mm_cmpngt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
395 ; SSE-LABEL: test_mm_cmpngt_ss:
397 ; SSE-NEXT: cmpnltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x05]
398 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
399 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
400 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
402 ; AVX-LABEL: test_mm_cmpngt_ss:
404 ; AVX-NEXT: vcmpnltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x05]
405 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
406 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
407 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
408 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 5)
409 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
413 define <4 x float> @test_mm_cmpnle_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
414 ; SSE-LABEL: test_mm_cmpnle_ps:
416 ; SSE-NEXT: cmpnleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x06]
417 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
419 ; AVX1-LABEL: test_mm_cmpnle_ps:
421 ; AVX1-NEXT: vcmpnleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x06]
422 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
424 ; AVX512-LABEL: test_mm_cmpnle_ps:
426 ; AVX512-NEXT: vcmpnleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x06]
427 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
428 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
429 %cmp = fcmp ugt <4 x float> %a0, %a1
430 %sext = sext <4 x i1> %cmp to <4 x i32>
431 %res = bitcast <4 x i32> %sext to <4 x float>
435 define <4 x float> @test_mm_cmpnle_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
436 ; SSE-LABEL: test_mm_cmpnle_ss:
438 ; SSE-NEXT: cmpnless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x06]
439 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
441 ; AVX-LABEL: test_mm_cmpnle_ss:
443 ; AVX-NEXT: vcmpnless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x06]
444 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
445 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 6)
449 define <4 x float> @test_mm_cmpnlt_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
450 ; SSE-LABEL: test_mm_cmpnlt_ps:
452 ; SSE-NEXT: cmpnltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x05]
453 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
455 ; AVX1-LABEL: test_mm_cmpnlt_ps:
457 ; AVX1-NEXT: vcmpnltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x05]
458 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
460 ; AVX512-LABEL: test_mm_cmpnlt_ps:
462 ; AVX512-NEXT: vcmpnltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x05]
463 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
464 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
465 %cmp = fcmp uge <4 x float> %a0, %a1
466 %sext = sext <4 x i1> %cmp to <4 x i32>
467 %res = bitcast <4 x i32> %sext to <4 x float>
471 define <4 x float> @test_mm_cmpnlt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
472 ; SSE-LABEL: test_mm_cmpnlt_ss:
474 ; SSE-NEXT: cmpnltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x05]
475 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
477 ; AVX-LABEL: test_mm_cmpnlt_ss:
479 ; AVX-NEXT: vcmpnltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x05]
480 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
481 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 5)
485 define <4 x float> @test_mm_cmpord_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
486 ; SSE-LABEL: test_mm_cmpord_ps:
488 ; SSE-NEXT: cmpordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x07]
489 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
491 ; AVX1-LABEL: test_mm_cmpord_ps:
493 ; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x07]
494 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
496 ; AVX512-LABEL: test_mm_cmpord_ps:
498 ; AVX512-NEXT: vcmpordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x07]
499 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
500 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
501 %cmp = fcmp ord <4 x float> %a0, %a1
502 %sext = sext <4 x i1> %cmp to <4 x i32>
503 %res = bitcast <4 x i32> %sext to <4 x float>
507 define <4 x float> @test_mm_cmpord_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
508 ; SSE-LABEL: test_mm_cmpord_ss:
510 ; SSE-NEXT: cmpordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x07]
511 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
513 ; AVX-LABEL: test_mm_cmpord_ss:
515 ; AVX-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x07]
516 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
517 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7)
521 define <4 x float> @test_mm_cmpunord_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
522 ; SSE-LABEL: test_mm_cmpunord_ps:
524 ; SSE-NEXT: cmpunordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x03]
525 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
527 ; AVX1-LABEL: test_mm_cmpunord_ps:
529 ; AVX1-NEXT: vcmpunordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x03]
530 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
532 ; AVX512-LABEL: test_mm_cmpunord_ps:
534 ; AVX512-NEXT: vcmpunordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x03]
535 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
536 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
537 %cmp = fcmp uno <4 x float> %a0, %a1
538 %sext = sext <4 x i1> %cmp to <4 x i32>
539 %res = bitcast <4 x i32> %sext to <4 x float>
543 define <4 x float> @test_mm_cmpunord_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
544 ; SSE-LABEL: test_mm_cmpunord_ss:
546 ; SSE-NEXT: cmpunordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x03]
547 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
549 ; AVX-LABEL: test_mm_cmpunord_ss:
551 ; AVX-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x03]
552 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
553 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 3)
557 define i32 @test_mm_comieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
558 ; SSE-LABEL: test_mm_comieq_ss:
560 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1]
561 ; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
562 ; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
563 ; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
564 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
565 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
567 ; AVX1-LABEL: test_mm_comieq_ss:
569 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1]
570 ; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
571 ; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
572 ; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
573 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
574 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
576 ; AVX512-LABEL: test_mm_comieq_ss:
578 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
579 ; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
580 ; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
581 ; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
582 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
583 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
584 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
587 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
589 define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
590 ; SSE-LABEL: test_mm_comige_ss:
592 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
593 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1]
594 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
595 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
597 ; AVX1-LABEL: test_mm_comige_ss:
599 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
600 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1]
601 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
602 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
604 ; AVX512-LABEL: test_mm_comige_ss:
606 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
607 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
608 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
609 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
610 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1)
613 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
615 define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
616 ; SSE-LABEL: test_mm_comigt_ss:
618 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
619 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1]
620 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
621 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
623 ; AVX1-LABEL: test_mm_comigt_ss:
625 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
626 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1]
627 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
628 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
630 ; AVX512-LABEL: test_mm_comigt_ss:
632 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
633 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
634 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
635 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
636 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1)
639 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
641 define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
642 ; SSE-LABEL: test_mm_comile_ss:
644 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
645 ; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8]
646 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
647 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
649 ; AVX1-LABEL: test_mm_comile_ss:
651 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
652 ; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8]
653 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
654 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
656 ; AVX512-LABEL: test_mm_comile_ss:
658 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
659 ; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8]
660 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
661 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
662 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1)
665 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
667 define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
668 ; SSE-LABEL: test_mm_comilt_ss:
670 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
671 ; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8]
672 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
673 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
675 ; AVX1-LABEL: test_mm_comilt_ss:
677 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
678 ; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8]
679 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
680 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
682 ; AVX512-LABEL: test_mm_comilt_ss:
684 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
685 ; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8]
686 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
687 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
688 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1)
691 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
693 define i32 @test_mm_comineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
694 ; SSE-LABEL: test_mm_comineq_ss:
696 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1]
697 ; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
698 ; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
699 ; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
700 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
701 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
703 ; AVX1-LABEL: test_mm_comineq_ss:
705 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1]
706 ; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
707 ; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
708 ; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
709 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
710 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
712 ; AVX512-LABEL: test_mm_comineq_ss:
714 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
715 ; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
716 ; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
717 ; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
718 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
719 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
720 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1)
723 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
725 define i32 @test_mm_cvt_ss2si(<4 x float> %a0) nounwind {
726 ; SSE-LABEL: test_mm_cvt_ss2si:
728 ; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0]
729 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
731 ; AVX1-LABEL: test_mm_cvt_ss2si:
733 ; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0]
734 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
736 ; AVX512-LABEL: test_mm_cvt_ss2si:
738 ; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0]
739 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
740 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
743 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
745 define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind {
746 ; X86-SSE-LABEL: test_mm_cvtsi32_ss:
748 ; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04]
749 ; X86-SSE-NEXT: retl # encoding: [0xc3]
751 ; X86-AVX1-LABEL: test_mm_cvtsi32_ss:
753 ; X86-AVX1-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04]
754 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
756 ; X86-AVX512-LABEL: test_mm_cvtsi32_ss:
757 ; X86-AVX512: # %bb.0:
758 ; X86-AVX512-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04]
759 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
761 ; X64-SSE-LABEL: test_mm_cvtsi32_ss:
763 ; X64-SSE-NEXT: cvtsi2ssl %edi, %xmm0 # encoding: [0xf3,0x0f,0x2a,0xc7]
764 ; X64-SSE-NEXT: retq # encoding: [0xc3]
766 ; X64-AVX1-LABEL: test_mm_cvtsi32_ss:
768 ; X64-AVX1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0xc7]
769 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
771 ; X64-AVX512-LABEL: test_mm_cvtsi32_ss:
772 ; X64-AVX512: # %bb.0:
773 ; X64-AVX512-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7]
774 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
775 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1)
778 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
780 define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind {
781 ; X86-SSE-LABEL: test_mm_cvtss_f32:
783 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
784 ; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24]
785 ; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
786 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
787 ; X86-SSE-NEXT: retl # encoding: [0xc3]
789 ; X86-AVX1-LABEL: test_mm_cvtss_f32:
791 ; X86-AVX1-NEXT: pushl %eax # encoding: [0x50]
792 ; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24]
793 ; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
794 ; X86-AVX1-NEXT: popl %eax # encoding: [0x58]
795 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
797 ; X86-AVX512-LABEL: test_mm_cvtss_f32:
798 ; X86-AVX512: # %bb.0:
799 ; X86-AVX512-NEXT: pushl %eax # encoding: [0x50]
800 ; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24]
801 ; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
802 ; X86-AVX512-NEXT: popl %eax # encoding: [0x58]
803 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
805 ; X64-LABEL: test_mm_cvtss_f32:
807 ; X64-NEXT: retq # encoding: [0xc3]
808 %res = extractelement <4 x float> %a0, i32 0
812 define i32 @test_mm_cvtss_si32(<4 x float> %a0) nounwind {
813 ; SSE-LABEL: test_mm_cvtss_si32:
815 ; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0]
816 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
818 ; AVX1-LABEL: test_mm_cvtss_si32:
820 ; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0]
821 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
823 ; AVX512-LABEL: test_mm_cvtss_si32:
825 ; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0]
826 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
827 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
831 define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind {
832 ; SSE-LABEL: test_mm_cvttss_si:
834 ; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0]
835 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
837 ; AVX1-LABEL: test_mm_cvttss_si:
839 ; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0]
840 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
842 ; AVX512-LABEL: test_mm_cvttss_si:
844 ; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0]
845 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
846 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
849 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
851 define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind {
852 ; SSE-LABEL: test_mm_cvttss_si32:
854 ; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0]
855 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
857 ; AVX1-LABEL: test_mm_cvttss_si32:
859 ; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0]
860 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
862 ; AVX512-LABEL: test_mm_cvttss_si32:
864 ; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0]
865 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
866 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
870 define <4 x float> @test_mm_div_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
871 ; SSE-LABEL: test_mm_div_ps:
873 ; SSE-NEXT: divps %xmm1, %xmm0 # encoding: [0x0f,0x5e,0xc1]
874 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
876 ; AVX1-LABEL: test_mm_div_ps:
878 ; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5e,0xc1]
879 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
881 ; AVX512-LABEL: test_mm_div_ps:
883 ; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1]
884 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
885 %res = fdiv <4 x float> %a0, %a1
889 define <4 x float> @test_mm_div_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
890 ; SSE-LABEL: test_mm_div_ss:
892 ; SSE-NEXT: divss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5e,0xc1]
893 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
895 ; AVX1-LABEL: test_mm_div_ss:
897 ; AVX1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5e,0xc1]
898 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
900 ; AVX512-LABEL: test_mm_div_ss:
902 ; AVX512-NEXT: vdivss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1]
903 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
904 %ext0 = extractelement <4 x float> %a0, i32 0
905 %ext1 = extractelement <4 x float> %a1, i32 0
906 %fdiv = fdiv float %ext0, %ext1
907 %res = insertelement <4 x float> %a0, float %fdiv, i32 0
911 define i32 @test_MM_GET_EXCEPTION_MASK() nounwind {
912 ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK:
914 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
915 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
916 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
917 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
918 ; X86-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
919 ; X86-SSE-NEXT: # imm = 0x1F80
920 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
921 ; X86-SSE-NEXT: retl # encoding: [0xc3]
923 ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK:
925 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
926 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
927 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
928 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
929 ; X86-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
930 ; X86-AVX-NEXT: # imm = 0x1F80
931 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
932 ; X86-AVX-NEXT: retl # encoding: [0xc3]
934 ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK:
936 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
937 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
938 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
939 ; X64-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
940 ; X64-SSE-NEXT: # imm = 0x1F80
941 ; X64-SSE-NEXT: retq # encoding: [0xc3]
943 ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK:
945 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
946 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
947 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
948 ; X64-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
949 ; X64-AVX-NEXT: # imm = 0x1F80
950 ; X64-AVX-NEXT: retq # encoding: [0xc3]
951 %1 = alloca i32, align 4
952 %2 = bitcast i32* %1 to i8*
953 call void @llvm.x86.sse.stmxcsr(i8* %2)
954 %3 = load i32, i32* %1, align 4
955 %4 = and i32 %3, 8064
958 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone
960 define i32 @test_MM_GET_EXCEPTION_STATE() nounwind {
961 ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE:
963 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
964 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
965 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
966 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
967 ; X86-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
968 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
969 ; X86-SSE-NEXT: retl # encoding: [0xc3]
971 ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE:
973 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
974 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
975 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
976 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
977 ; X86-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
978 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
979 ; X86-AVX-NEXT: retl # encoding: [0xc3]
981 ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE:
983 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
984 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
985 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
986 ; X64-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
987 ; X64-SSE-NEXT: retq # encoding: [0xc3]
989 ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE:
991 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
992 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
993 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
994 ; X64-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
995 ; X64-AVX-NEXT: retq # encoding: [0xc3]
996 %1 = alloca i32, align 4
997 %2 = bitcast i32* %1 to i8*
998 call void @llvm.x86.sse.stmxcsr(i8* %2)
999 %3 = load i32, i32* %1, align 4
1004 define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind {
1005 ; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
1007 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1008 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1009 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
1010 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1011 ; X86-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
1012 ; X86-SSE-NEXT: # imm = 0x8000
1013 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
1014 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1016 ; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
1018 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1019 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1020 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
1021 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1022 ; X86-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
1023 ; X86-AVX-NEXT: # imm = 0x8000
1024 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
1025 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1027 ; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
1029 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1030 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1031 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1032 ; X64-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
1033 ; X64-SSE-NEXT: # imm = 0x8000
1034 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1036 ; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
1038 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1039 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1040 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1041 ; X64-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
1042 ; X64-AVX-NEXT: # imm = 0x8000
1043 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1044 %1 = alloca i32, align 4
1045 %2 = bitcast i32* %1 to i8*
1046 call void @llvm.x86.sse.stmxcsr(i8* %2)
1047 %3 = load i32, i32* %1, align 4
1048 %4 = and i32 %3, 32768
1052 define i32 @test_MM_GET_ROUNDING_MODE() nounwind {
1053 ; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE:
1055 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1056 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1057 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
1058 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1059 ; X86-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
1060 ; X86-SSE-NEXT: # imm = 0x6000
1061 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
1062 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1064 ; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE:
1066 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1067 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1068 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
1069 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1070 ; X86-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
1071 ; X86-AVX-NEXT: # imm = 0x6000
1072 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
1073 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1075 ; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE:
1077 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1078 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1079 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1080 ; X64-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
1081 ; X64-SSE-NEXT: # imm = 0x6000
1082 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1084 ; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE:
1086 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1087 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1088 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1089 ; X64-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
1090 ; X64-AVX-NEXT: # imm = 0x6000
1091 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1092 %1 = alloca i32, align 4
1093 %2 = bitcast i32* %1 to i8*
1094 call void @llvm.x86.sse.stmxcsr(i8* %2)
1095 %3 = load i32, i32* %1, align 4
1096 %4 = and i32 %3, 24576
1100 define i32 @test_mm_getcsr() nounwind {
1101 ; X86-SSE-LABEL: test_mm_getcsr:
1103 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1104 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1105 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
1106 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1107 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
1108 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1110 ; X86-AVX-LABEL: test_mm_getcsr:
1112 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1113 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1114 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
1115 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1116 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
1117 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1119 ; X64-SSE-LABEL: test_mm_getcsr:
1121 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1122 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1123 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1124 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1126 ; X64-AVX-LABEL: test_mm_getcsr:
1128 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1129 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1130 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1131 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1132 %1 = alloca i32, align 4
1133 %2 = bitcast i32* %1 to i8*
1134 call void @llvm.x86.sse.stmxcsr(i8* %2)
1135 %3 = load i32, i32* %1, align 4
1139 define <4 x float> @test_mm_load_ps(float* %a0) nounwind {
1140 ; X86-SSE-LABEL: test_mm_load_ps:
1142 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1143 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
1144 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1146 ; X86-AVX1-LABEL: test_mm_load_ps:
1147 ; X86-AVX1: # %bb.0:
1148 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1149 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
1150 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1152 ; X86-AVX512-LABEL: test_mm_load_ps:
1153 ; X86-AVX512: # %bb.0:
1154 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1155 ; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
1156 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1158 ; X64-SSE-LABEL: test_mm_load_ps:
1160 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
1161 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1163 ; X64-AVX1-LABEL: test_mm_load_ps:
1164 ; X64-AVX1: # %bb.0:
1165 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
1166 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1168 ; X64-AVX512-LABEL: test_mm_load_ps:
1169 ; X64-AVX512: # %bb.0:
1170 ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
1171 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1172 %arg0 = bitcast float* %a0 to <4 x float>*
1173 %res = load <4 x float>, <4 x float>* %arg0, align 16
1174 ret <4 x float> %res
1177 define <4 x float> @test_mm_load_ps1(float* %a0) nounwind {
1178 ; X86-SSE-LABEL: test_mm_load_ps1:
1180 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1181 ; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00]
1182 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
1183 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
1184 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
1185 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1187 ; X86-AVX1-LABEL: test_mm_load_ps1:
1188 ; X86-AVX1: # %bb.0:
1189 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1190 ; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00]
1191 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1193 ; X86-AVX512-LABEL: test_mm_load_ps1:
1194 ; X86-AVX512: # %bb.0:
1195 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1196 ; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00]
1197 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1199 ; X64-SSE-LABEL: test_mm_load_ps1:
1201 ; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07]
1202 ; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
1203 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
1204 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
1205 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1207 ; X64-AVX1-LABEL: test_mm_load_ps1:
1208 ; X64-AVX1: # %bb.0:
1209 ; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07]
1210 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1212 ; X64-AVX512-LABEL: test_mm_load_ps1:
1213 ; X64-AVX512: # %bb.0:
1214 ; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07]
1215 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1216 %ld = load float, float* %a0, align 4
1217 %res0 = insertelement <4 x float> undef, float %ld, i32 0
1218 %res1 = insertelement <4 x float> %res0, float %ld, i32 1
1219 %res2 = insertelement <4 x float> %res1, float %ld, i32 2
1220 %res3 = insertelement <4 x float> %res2, float %ld, i32 3
1221 ret <4 x float> %res3
1224 define <4 x float> @test_mm_load_ss(float* %a0) nounwind {
1225 ; X86-SSE-LABEL: test_mm_load_ss:
1227 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1228 ; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00]
1229 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
1230 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1232 ; X86-AVX1-LABEL: test_mm_load_ss:
1233 ; X86-AVX1: # %bb.0:
1234 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1235 ; X86-AVX1-NEXT: vmovss (%eax), %xmm0 # encoding: [0xc5,0xfa,0x10,0x00]
1236 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
1237 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1239 ; X86-AVX512-LABEL: test_mm_load_ss:
1240 ; X86-AVX512: # %bb.0:
1241 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1242 ; X86-AVX512-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
1243 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
1244 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1246 ; X64-SSE-LABEL: test_mm_load_ss:
1248 ; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07]
1249 ; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
1250 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1252 ; X64-AVX1-LABEL: test_mm_load_ss:
1253 ; X64-AVX1: # %bb.0:
1254 ; X64-AVX1-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
1255 ; X64-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
1256 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1258 ; X64-AVX512-LABEL: test_mm_load_ss:
1259 ; X64-AVX512: # %bb.0:
1260 ; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
1261 ; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
1262 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1263 %ld = load float, float* %a0, align 1
1264 %res0 = insertelement <4 x float> undef, float %ld, i32 0
1265 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1
1266 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2
1267 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3
1268 ret <4 x float> %res3
1271 define <4 x float> @test_mm_load1_ps(float* %a0) nounwind {
1272 ; X86-SSE-LABEL: test_mm_load1_ps:
1274 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1275 ; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00]
1276 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
1277 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
1278 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
1279 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1281 ; X86-AVX1-LABEL: test_mm_load1_ps:
1282 ; X86-AVX1: # %bb.0:
1283 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1284 ; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00]
1285 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1287 ; X86-AVX512-LABEL: test_mm_load1_ps:
1288 ; X86-AVX512: # %bb.0:
1289 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1290 ; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00]
1291 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1293 ; X64-SSE-LABEL: test_mm_load1_ps:
1295 ; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07]
1296 ; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
1297 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
1298 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
1299 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1301 ; X64-AVX1-LABEL: test_mm_load1_ps:
1302 ; X64-AVX1: # %bb.0:
1303 ; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07]
1304 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1306 ; X64-AVX512-LABEL: test_mm_load1_ps:
1307 ; X64-AVX512: # %bb.0:
1308 ; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07]
1309 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1310 %ld = load float, float* %a0, align 4
1311 %res0 = insertelement <4 x float> undef, float %ld, i32 0
1312 %res1 = insertelement <4 x float> %res0, float %ld, i32 1
1313 %res2 = insertelement <4 x float> %res1, float %ld, i32 2
1314 %res3 = insertelement <4 x float> %res2, float %ld, i32 3
1315 ret <4 x float> %res3
1318 define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) {
1319 ; X86-SSE-LABEL: test_mm_loadh_pi:
1321 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1322 ; X86-SSE-NEXT: movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08]
1323 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
1324 ; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04]
1325 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero
1326 ; X86-SSE-NEXT: shufps $0, %xmm1, %xmm2 # encoding: [0x0f,0xc6,0xd1,0x00]
1327 ; X86-SSE-NEXT: # xmm2 = xmm2[0,0],xmm1[0,0]
1328 ; X86-SSE-NEXT: shufps $36, %xmm2, %xmm0 # encoding: [0x0f,0xc6,0xc2,0x24]
1329 ; X86-SSE-NEXT: # xmm0 = xmm0[0,1],xmm2[2,0]
1330 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1332 ; X86-AVX1-LABEL: test_mm_loadh_pi:
1333 ; X86-AVX1: # %bb.0:
1334 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1335 ; X86-AVX1-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00]
1336 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
1337 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1339 ; X86-AVX512-LABEL: test_mm_loadh_pi:
1340 ; X86-AVX512: # %bb.0:
1341 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1342 ; X86-AVX512-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00]
1343 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
1344 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1346 ; X64-SSE-LABEL: test_mm_loadh_pi:
1348 ; X64-SSE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
1349 ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8]
1350 ; X64-SSE-NEXT: shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20]
1351 ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
1352 ; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8]
1353 ; X64-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
1354 ; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc]
1355 ; X64-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero
1356 ; X64-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca]
1357 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1358 ; X64-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
1359 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
1360 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1362 ; X64-AVX1-LABEL: test_mm_loadh_pi:
1363 ; X64-AVX1: # %bb.0:
1364 ; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07]
1365 ; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
1366 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1368 ; X64-AVX512-LABEL: test_mm_loadh_pi:
1369 ; X64-AVX512: # %bb.0:
1370 ; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07]
1371 ; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0]
1372 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1373 %ptr = bitcast x86_mmx* %a1 to <2 x float>*
1374 %ld = load <2 x float>, <2 x float>* %ptr
1375 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1376 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1377 ret <4 x float> %res
1380 define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) {
1381 ; X86-SSE-LABEL: test_mm_loadl_pi:
1383 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1384 ; X86-SSE-NEXT: movss (%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x10]
1385 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero
1386 ; X86-SSE-NEXT: movss 4(%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x48,0x04]
1387 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
1388 ; X86-SSE-NEXT: shufps $0, %xmm2, %xmm1 # encoding: [0x0f,0xc6,0xca,0x00]
1389 ; X86-SSE-NEXT: # xmm1 = xmm1[0,0],xmm2[0,0]
1390 ; X86-SSE-NEXT: shufps $226, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe2]
1391 ; X86-SSE-NEXT: # xmm1 = xmm1[2,0],xmm0[2,3]
1392 ; X86-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
1393 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1395 ; X86-AVX1-LABEL: test_mm_loadl_pi:
1396 ; X86-AVX1: # %bb.0:
1397 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1398 ; X86-AVX1-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00]
1399 ; X86-AVX1-NEXT: # xmm0 = mem[0],xmm0[1]
1400 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1402 ; X86-AVX512-LABEL: test_mm_loadl_pi:
1403 ; X86-AVX512: # %bb.0:
1404 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1405 ; X86-AVX512-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00]
1406 ; X86-AVX512-NEXT: # xmm0 = mem[0],xmm0[1]
1407 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1409 ; X64-SSE-LABEL: test_mm_loadl_pi:
1411 ; X64-SSE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
1412 ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8]
1413 ; X64-SSE-NEXT: shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20]
1414 ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
1415 ; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8]
1416 ; X64-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
1417 ; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc]
1418 ; X64-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero
1419 ; X64-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca]
1420 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1421 ; X64-SSE-NEXT: shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4]
1422 ; X64-SSE-NEXT: # xmm1 = xmm1[0,1],xmm0[2,3]
1423 ; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
1424 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1426 ; X64-AVX1-LABEL: test_mm_loadl_pi:
1427 ; X64-AVX1: # %bb.0:
1428 ; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07]
1429 ; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1]
1430 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1432 ; X64-AVX512-LABEL: test_mm_loadl_pi:
1433 ; X64-AVX512: # %bb.0:
1434 ; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07]
1435 ; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1]
1436 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1437 %ptr = bitcast x86_mmx* %a1 to <2 x float>*
1438 %ld = load <2 x float>, <2 x float>* %ptr
1439 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1440 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1441 ret <4 x float> %res
1444 define <4 x float> @test_mm_loadr_ps(float* %a0) nounwind {
1445 ; X86-SSE-LABEL: test_mm_loadr_ps:
1447 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1448 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
1449 ; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b]
1450 ; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0]
1451 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1453 ; X86-AVX1-LABEL: test_mm_loadr_ps:
1454 ; X86-AVX1: # %bb.0:
1455 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1456 ; X86-AVX1-NEXT: vpermilps $27, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b]
1457 ; X86-AVX1-NEXT: # xmm0 = mem[3,2,1,0]
1458 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1460 ; X86-AVX512-LABEL: test_mm_loadr_ps:
1461 ; X86-AVX512: # %bb.0:
1462 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1463 ; X86-AVX512-NEXT: vpermilps $27, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b]
1464 ; X86-AVX512-NEXT: # xmm0 = mem[3,2,1,0]
1465 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1467 ; X64-SSE-LABEL: test_mm_loadr_ps:
1469 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
1470 ; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b]
1471 ; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0]
1472 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1474 ; X64-AVX1-LABEL: test_mm_loadr_ps:
1475 ; X64-AVX1: # %bb.0:
1476 ; X64-AVX1-NEXT: vpermilps $27, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b]
1477 ; X64-AVX1-NEXT: # xmm0 = mem[3,2,1,0]
1478 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1480 ; X64-AVX512-LABEL: test_mm_loadr_ps:
1481 ; X64-AVX512: # %bb.0:
1482 ; X64-AVX512-NEXT: vpermilps $27, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b]
1483 ; X64-AVX512-NEXT: # xmm0 = mem[3,2,1,0]
1484 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1485 %arg0 = bitcast float* %a0 to <4 x float>*
1486 %ld = load <4 x float>, <4 x float>* %arg0, align 16
1487 %res = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1488 ret <4 x float> %res
1491 define <4 x float> @test_mm_loadu_ps(float* %a0) nounwind {
1492 ; X86-SSE-LABEL: test_mm_loadu_ps:
1494 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1495 ; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
1496 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1498 ; X86-AVX1-LABEL: test_mm_loadu_ps:
1499 ; X86-AVX1: # %bb.0:
1500 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1501 ; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
1502 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1504 ; X86-AVX512-LABEL: test_mm_loadu_ps:
1505 ; X86-AVX512: # %bb.0:
1506 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1507 ; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
1508 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1510 ; X64-SSE-LABEL: test_mm_loadu_ps:
1512 ; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
1513 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1515 ; X64-AVX1-LABEL: test_mm_loadu_ps:
1516 ; X64-AVX1: # %bb.0:
1517 ; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
1518 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1520 ; X64-AVX512-LABEL: test_mm_loadu_ps:
1521 ; X64-AVX512: # %bb.0:
1522 ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
1523 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1524 %arg0 = bitcast float* %a0 to <4 x float>*
1525 %res = load <4 x float>, <4 x float>* %arg0, align 1
1526 ret <4 x float> %res
1529 define <4 x float> @test_mm_max_ps(<4 x float> %a0, <4 x float> %a1) {
1530 ; SSE-LABEL: test_mm_max_ps:
1532 ; SSE-NEXT: maxps %xmm1, %xmm0 # encoding: [0x0f,0x5f,0xc1]
1533 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1535 ; AVX1-LABEL: test_mm_max_ps:
1537 ; AVX1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5f,0xc1]
1538 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1540 ; AVX512-LABEL: test_mm_max_ps:
1542 ; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
1543 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1544 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
1545 ret <4 x float> %res
1547 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1549 define <4 x float> @test_mm_max_ss(<4 x float> %a0, <4 x float> %a1) {
1550 ; SSE-LABEL: test_mm_max_ss:
1552 ; SSE-NEXT: maxss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5f,0xc1]
1553 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1555 ; AVX1-LABEL: test_mm_max_ss:
1557 ; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5f,0xc1]
1558 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1560 ; AVX512-LABEL: test_mm_max_ss:
1562 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5f,0xc1]
1563 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1564 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
1565 ret <4 x float> %res
1567 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1569 define <4 x float> @test_mm_min_ps(<4 x float> %a0, <4 x float> %a1) {
1570 ; SSE-LABEL: test_mm_min_ps:
1572 ; SSE-NEXT: minps %xmm1, %xmm0 # encoding: [0x0f,0x5d,0xc1]
1573 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1575 ; AVX1-LABEL: test_mm_min_ps:
1577 ; AVX1-NEXT: vminps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5d,0xc1]
1578 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1580 ; AVX512-LABEL: test_mm_min_ps:
1582 ; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
1583 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1584 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1585 ret <4 x float> %res
1587 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1589 define <4 x float> @test_mm_min_ss(<4 x float> %a0, <4 x float> %a1) {
1590 ; SSE-LABEL: test_mm_min_ss:
1592 ; SSE-NEXT: minss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5d,0xc1]
1593 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1595 ; AVX1-LABEL: test_mm_min_ss:
1597 ; AVX1-NEXT: vminss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5d,0xc1]
1598 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1600 ; AVX512-LABEL: test_mm_min_ss:
1602 ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5d,0xc1]
1603 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1604 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
1605 ret <4 x float> %res
1607 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1609 define <4 x float> @test_mm_move_ss(<4 x float> %a0, <4 x float> %a1) {
1610 ; SSE-LABEL: test_mm_move_ss:
1612 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
1613 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
1614 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1616 ; AVX-LABEL: test_mm_move_ss:
1618 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
1619 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
1620 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1621 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1622 ret <4 x float> %res
1625 define <4 x float> @test_mm_movehl_ps(<4 x float> %a0, <4 x float> %a1) {
1626 ; SSE-LABEL: test_mm_movehl_ps:
1628 ; SSE-NEXT: movhlps %xmm1, %xmm0 # encoding: [0x0f,0x12,0xc1]
1629 ; SSE-NEXT: # xmm0 = xmm1[1],xmm0[1]
1630 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1632 ; AVX1-LABEL: test_mm_movehl_ps:
1634 ; AVX1-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x15,0xc0]
1635 ; AVX1-NEXT: # xmm0 = xmm1[1],xmm0[1]
1636 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1638 ; AVX512-LABEL: test_mm_movehl_ps:
1640 ; AVX512-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x15,0xc0]
1641 ; AVX512-NEXT: # xmm0 = xmm1[1],xmm0[1]
1642 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1643 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1644 ret <4 x float> %res
1647 define <4 x float> @test_mm_movelh_ps(<4 x float> %a0, <4 x float> %a1) {
1648 ; SSE-LABEL: test_mm_movelh_ps:
1650 ; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
1651 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
1652 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1654 ; AVX1-LABEL: test_mm_movelh_ps:
1656 ; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
1657 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0]
1658 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1660 ; AVX512-LABEL: test_mm_movelh_ps:
1662 ; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
1663 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0]
1664 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1665 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1666 ret <4 x float> %res
1669 define i32 @test_mm_movemask_ps(<4 x float> %a0) nounwind {
1670 ; SSE-LABEL: test_mm_movemask_ps:
1672 ; SSE-NEXT: movmskps %xmm0, %eax # encoding: [0x0f,0x50,0xc0]
1673 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1675 ; AVX-LABEL: test_mm_movemask_ps:
1677 ; AVX-NEXT: vmovmskps %xmm0, %eax # encoding: [0xc5,0xf8,0x50,0xc0]
1678 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1679 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
1682 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1684 define <4 x float> @test_mm_mul_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
1685 ; SSE-LABEL: test_mm_mul_ps:
1687 ; SSE-NEXT: mulps %xmm1, %xmm0 # encoding: [0x0f,0x59,0xc1]
1688 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1690 ; AVX1-LABEL: test_mm_mul_ps:
1692 ; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x59,0xc1]
1693 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1695 ; AVX512-LABEL: test_mm_mul_ps:
1697 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1]
1698 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1699 %res = fmul <4 x float> %a0, %a1
1700 ret <4 x float> %res
1703 define <4 x float> @test_mm_mul_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
1704 ; SSE-LABEL: test_mm_mul_ss:
1706 ; SSE-NEXT: mulss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x59,0xc1]
1707 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1709 ; AVX1-LABEL: test_mm_mul_ss:
1711 ; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x59,0xc1]
1712 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1714 ; AVX512-LABEL: test_mm_mul_ss:
1716 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1]
1717 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1718 %ext0 = extractelement <4 x float> %a0, i32 0
1719 %ext1 = extractelement <4 x float> %a1, i32 0
1720 %fmul = fmul float %ext0, %ext1
1721 %res = insertelement <4 x float> %a0, float %fmul, i32 0
1722 ret <4 x float> %res
1725 define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
1726 ; SSE-LABEL: test_mm_or_ps:
1728 ; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1]
1729 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1731 ; AVX1-LABEL: test_mm_or_ps:
1733 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
1734 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1736 ; AVX512-LABEL: test_mm_or_ps:
1738 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
1739 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1740 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
1741 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
1742 %res = or <4 x i32> %arg0, %arg1
1743 %bc = bitcast <4 x i32> %res to <4 x float>
1747 define void @test_mm_prefetch(i8* %a0) {
1748 ; X86-LABEL: test_mm_prefetch:
1750 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1751 ; X86-NEXT: prefetchnta (%eax) # encoding: [0x0f,0x18,0x00]
1752 ; X86-NEXT: retl # encoding: [0xc3]
1754 ; X64-LABEL: test_mm_prefetch:
1756 ; X64-NEXT: prefetchnta (%rdi) # encoding: [0x0f,0x18,0x07]
1757 ; X64-NEXT: retq # encoding: [0xc3]
1758 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1)
1761 declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone
1763 define <4 x float> @test_mm_rcp_ps(<4 x float> %a0) {
1764 ; SSE-LABEL: test_mm_rcp_ps:
1766 ; SSE-NEXT: rcpps %xmm0, %xmm0 # encoding: [0x0f,0x53,0xc0]
1767 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1769 ; AVX-LABEL: test_mm_rcp_ps:
1771 ; AVX-NEXT: vrcpps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x53,0xc0]
1772 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1773 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
1774 ret <4 x float> %res
1776 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1778 define <4 x float> @test_mm_rcp_ss(<4 x float> %a0) {
1779 ; SSE-LABEL: test_mm_rcp_ss:
1781 ; SSE-NEXT: rcpss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x53,0xc0]
1782 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1784 ; AVX-LABEL: test_mm_rcp_ss:
1786 ; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x53,0xc0]
1787 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1788 %rcp = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0)
1789 ret <4 x float> %rcp
1791 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1793 define <4 x float> @test_mm_rsqrt_ps(<4 x float> %a0) {
1794 ; SSE-LABEL: test_mm_rsqrt_ps:
1796 ; SSE-NEXT: rsqrtps %xmm0, %xmm0 # encoding: [0x0f,0x52,0xc0]
1797 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1799 ; AVX-LABEL: test_mm_rsqrt_ps:
1801 ; AVX-NEXT: vrsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x52,0xc0]
1802 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1803 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
1804 ret <4 x float> %res
1806 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1808 define <4 x float> @test_mm_rsqrt_ss(<4 x float> %a0) {
1809 ; SSE-LABEL: test_mm_rsqrt_ss:
1811 ; SSE-NEXT: rsqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x52,0xc0]
1812 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1814 ; AVX-LABEL: test_mm_rsqrt_ss:
1816 ; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x52,0xc0]
1817 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1818 %rsqrt = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0)
1819 ret <4 x float> %rsqrt
1821 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
1823 define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind {
1824 ; X86-SSE-LABEL: test_MM_SET_EXCEPTION_MASK:
1826 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1827 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1828 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1829 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
1830 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1831 ; X86-SSE-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff]
1832 ; X86-SSE-NEXT: # imm = 0xE07F
1833 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1834 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1835 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
1836 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
1837 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1839 ; X86-AVX-LABEL: test_MM_SET_EXCEPTION_MASK:
1841 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1842 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1843 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1844 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
1845 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1846 ; X86-AVX-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff]
1847 ; X86-AVX-NEXT: # imm = 0xE07F
1848 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1849 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1850 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
1851 ; X86-AVX-NEXT: popl %eax # encoding: [0x58]
1852 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1854 ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK:
1856 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1857 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1858 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1859 ; X64-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff]
1860 ; X64-SSE-NEXT: # imm = 0xE07F
1861 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1862 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1863 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
1864 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1866 ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK:
1868 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1869 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1870 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1871 ; X64-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff]
1872 ; X64-AVX-NEXT: # imm = 0xE07F
1873 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1874 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1875 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
1876 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1877 %1 = alloca i32, align 4
1878 %2 = bitcast i32* %1 to i8*
1879 call void @llvm.x86.sse.stmxcsr(i8* %2)
1880 %3 = load i32, i32* %1
1881 %4 = and i32 %3, -8065
1883 store i32 %5, i32* %1
1884 call void @llvm.x86.sse.ldmxcsr(i8* %2)
1887 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone
1889 define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind {
1890 ; X86-SSE-LABEL: test_MM_SET_EXCEPTION_STATE:
1892 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1893 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1894 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1895 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
1896 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1897 ; X86-SSE-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0]
1898 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1899 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1900 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
1901 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
1902 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1904 ; X86-AVX-LABEL: test_MM_SET_EXCEPTION_STATE:
1906 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1907 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1908 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1909 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
1910 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1911 ; X86-AVX-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0]
1912 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1913 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1914 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
1915 ; X86-AVX-NEXT: popl %eax # encoding: [0x58]
1916 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1918 ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE:
1920 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1921 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1922 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1923 ; X64-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0]
1924 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1925 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1926 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
1927 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1929 ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE:
1931 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1932 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1933 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1934 ; X64-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0]
1935 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1936 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1937 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
1938 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1939 %1 = alloca i32, align 4
1940 %2 = bitcast i32* %1 to i8*
1941 call void @llvm.x86.sse.stmxcsr(i8* %2)
1942 %3 = load i32, i32* %1
1943 %4 = and i32 %3, -64
1945 store i32 %5, i32* %1
1946 call void @llvm.x86.sse.ldmxcsr(i8* %2)
1950 define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind {
1951 ; X86-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
1953 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1954 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1955 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1956 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
1957 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1958 ; X86-SSE-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff]
1959 ; X86-SSE-NEXT: # imm = 0xFFFF7FFF
1960 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1961 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1962 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
1963 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
1964 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1966 ; X86-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
1968 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1969 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1970 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1971 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
1972 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1973 ; X86-AVX-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff]
1974 ; X86-AVX-NEXT: # imm = 0xFFFF7FFF
1975 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1976 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1977 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
1978 ; X86-AVX-NEXT: popl %eax # encoding: [0x58]
1979 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1981 ; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
1983 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1984 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1985 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1986 ; X64-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff]
1987 ; X64-SSE-NEXT: # imm = 0xFFFF7FFF
1988 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1989 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1990 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
1991 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1993 ; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
1995 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1996 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1997 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1998 ; X64-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff]
1999 ; X64-AVX-NEXT: # imm = 0xFFFF7FFF
2000 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
2001 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
2002 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
2003 ; X64-AVX-NEXT: retq # encoding: [0xc3]
2004 %1 = alloca i32, align 4
2005 %2 = bitcast i32* %1 to i8*
2006 call void @llvm.x86.sse.stmxcsr(i8* %2)
2007 %3 = load i32, i32* %1
2008 %4 = and i32 %3, -32769
2010 store i32 %5, i32* %1
2011 call void @llvm.x86.sse.ldmxcsr(i8* %2)
2015 define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) nounwind {
2016 ; X86-SSE-LABEL: test_mm_set_ps:
2018 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
2019 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
2020 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
2021 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
2022 ; X86-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1]
2023 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2024 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08]
2025 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
2026 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x04]
2027 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero
2028 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca]
2029 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2030 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
2031 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
2032 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2034 ; X86-AVX1-LABEL: test_mm_set_ps:
2035 ; X86-AVX1: # %bb.0:
2036 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10]
2037 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
2038 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
2039 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero
2040 ; X86-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10]
2041 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
2042 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08]
2043 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero
2044 ; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20]
2045 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
2046 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04]
2047 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero
2048 ; X86-AVX1-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30]
2049 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0]
2050 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2052 ; X86-AVX512-LABEL: test_mm_set_ps:
2053 ; X86-AVX512: # %bb.0:
2054 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10]
2055 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
2056 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
2057 ; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero
2058 ; X86-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10]
2059 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
2060 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08]
2061 ; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero
2062 ; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20]
2063 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
2064 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04]
2065 ; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero
2066 ; X86-AVX512-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30]
2067 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0]
2068 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2070 ; X64-SSE-LABEL: test_mm_set_ps:
2072 ; X64-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
2073 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2074 ; X64-SSE-NEXT: unpcklps %xmm2, %xmm3 # encoding: [0x0f,0x14,0xda]
2075 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
2076 ; X64-SSE-NEXT: movlhps %xmm1, %xmm3 # encoding: [0x0f,0x16,0xd9]
2077 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm1[0]
2078 ; X64-SSE-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3]
2079 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2081 ; X64-AVX1-LABEL: test_mm_set_ps:
2082 ; X64-AVX1: # %bb.0:
2083 ; X64-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10]
2084 ; X64-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2085 ; X64-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20]
2086 ; X64-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
2087 ; X64-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30]
2088 ; X64-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0]
2089 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2091 ; X64-AVX512-LABEL: test_mm_set_ps:
2092 ; X64-AVX512: # %bb.0:
2093 ; X64-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10]
2094 ; X64-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2095 ; X64-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20]
2096 ; X64-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
2097 ; X64-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30]
2098 ; X64-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0]
2099 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2100 %res0 = insertelement <4 x float> undef, float %a3, i32 0
2101 %res1 = insertelement <4 x float> %res0, float %a2, i32 1
2102 %res2 = insertelement <4 x float> %res1, float %a1, i32 2
2103 %res3 = insertelement <4 x float> %res2, float %a0, i32 3
2104 ret <4 x float> %res3
2107 define <4 x float> @test_mm_set_ps1(float %a0) nounwind {
2108 ; X86-SSE-LABEL: test_mm_set_ps1:
2110 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
2111 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
2112 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2113 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2114 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2116 ; X86-AVX1-LABEL: test_mm_set_ps1:
2117 ; X86-AVX1: # %bb.0:
2118 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2119 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
2120 ; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
2121 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2122 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2124 ; X86-AVX512-LABEL: test_mm_set_ps1:
2125 ; X86-AVX512: # %bb.0:
2126 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2127 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
2128 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2129 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2131 ; X64-SSE-LABEL: test_mm_set_ps1:
2133 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2134 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2135 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2137 ; X64-AVX1-LABEL: test_mm_set_ps1:
2138 ; X64-AVX1: # %bb.0:
2139 ; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
2140 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2141 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2143 ; X64-AVX512-LABEL: test_mm_set_ps1:
2144 ; X64-AVX512: # %bb.0:
2145 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2146 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2147 %res0 = insertelement <4 x float> undef, float %a0, i32 0
2148 %res1 = insertelement <4 x float> %res0, float %a0, i32 1
2149 %res2 = insertelement <4 x float> %res1, float %a0, i32 2
2150 %res3 = insertelement <4 x float> %res2, float %a0, i32 3
2151 ret <4 x float> %res3
2154 define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind {
2155 ; X86-SSE-LABEL: test_MM_SET_ROUNDING_MODE:
2157 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
2158 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2159 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
2160 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
2161 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
2162 ; X86-SSE-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff]
2163 ; X86-SSE-NEXT: # imm = 0x9FFF
2164 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
2165 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
2166 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
2167 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
2168 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2170 ; X86-AVX-LABEL: test_MM_SET_ROUNDING_MODE:
2172 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
2173 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2174 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
2175 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
2176 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
2177 ; X86-AVX-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff]
2178 ; X86-AVX-NEXT: # imm = 0x9FFF
2179 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
2180 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
2181 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
2182 ; X86-AVX-NEXT: popl %eax # encoding: [0x58]
2183 ; X86-AVX-NEXT: retl # encoding: [0xc3]
2185 ; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE:
2187 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
2188 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
2189 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
2190 ; X64-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff]
2191 ; X64-SSE-NEXT: # imm = 0x9FFF
2192 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
2193 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
2194 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
2195 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2197 ; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE:
2199 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
2200 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
2201 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
2202 ; X64-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff]
2203 ; X64-AVX-NEXT: # imm = 0x9FFF
2204 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
2205 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
2206 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
2207 ; X64-AVX-NEXT: retq # encoding: [0xc3]
2208 %1 = alloca i32, align 4
2209 %2 = bitcast i32* %1 to i8*
2210 call void @llvm.x86.sse.stmxcsr(i8* %2)
2211 %3 = load i32, i32* %1
2212 %4 = and i32 %3, -24577
2214 store i32 %5, i32* %1
2215 call void @llvm.x86.sse.ldmxcsr(i8* %2)
2219 define <4 x float> @test_mm_set_ss(float %a0) nounwind {
2220 ; X86-SSE-LABEL: test_mm_set_ss:
2222 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04]
2223 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
2224 ; X86-SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
2225 ; X86-SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
2226 ; X86-SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
2227 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2229 ; X86-AVX1-LABEL: test_mm_set_ss:
2230 ; X86-AVX1: # %bb.0:
2231 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2232 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
2233 ; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
2234 ; X86-AVX1-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
2235 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
2236 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2238 ; X86-AVX512-LABEL: test_mm_set_ss:
2239 ; X86-AVX512: # %bb.0:
2240 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2241 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
2242 ; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
2243 ; X86-AVX512-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
2244 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
2245 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2247 ; X64-SSE-LABEL: test_mm_set_ss:
2249 ; X64-SSE-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9]
2250 ; X64-SSE-NEXT: movss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0x10,0xc8]
2251 ; X64-SSE-NEXT: # xmm1 = xmm0[0],xmm1[1,2,3]
2252 ; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
2253 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2255 ; X64-AVX-LABEL: test_mm_set_ss:
2257 ; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
2258 ; X64-AVX-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
2259 ; X64-AVX-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
2260 ; X64-AVX-NEXT: retq # encoding: [0xc3]
2261 %res0 = insertelement <4 x float> undef, float %a0, i32 0
2262 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1
2263 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2
2264 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3
2265 ret <4 x float> %res3
2268 define <4 x float> @test_mm_set1_ps(float %a0) nounwind {
2269 ; X86-SSE-LABEL: test_mm_set1_ps:
2271 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
2272 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
2273 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2274 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2275 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2277 ; X86-AVX1-LABEL: test_mm_set1_ps:
2278 ; X86-AVX1: # %bb.0:
2279 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2280 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
2281 ; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
2282 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2283 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2285 ; X86-AVX512-LABEL: test_mm_set1_ps:
2286 ; X86-AVX512: # %bb.0:
2287 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2288 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
2289 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2290 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2292 ; X64-SSE-LABEL: test_mm_set1_ps:
2294 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2295 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2296 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2298 ; X64-AVX1-LABEL: test_mm_set1_ps:
2299 ; X64-AVX1: # %bb.0:
2300 ; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
2301 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2302 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2304 ; X64-AVX512-LABEL: test_mm_set1_ps:
2305 ; X64-AVX512: # %bb.0:
2306 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2307 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2308 %res0 = insertelement <4 x float> undef, float %a0, i32 0
2309 %res1 = insertelement <4 x float> %res0, float %a0, i32 1
2310 %res2 = insertelement <4 x float> %res1, float %a0, i32 2
2311 %res3 = insertelement <4 x float> %res2, float %a0, i32 3
2312 ret <4 x float> %res3
2315 define void @test_mm_setcsr(i32 %a0) nounwind {
2316 ; X86-SSE-LABEL: test_mm_setcsr:
2318 ; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04]
2319 ; X86-SSE-NEXT: ldmxcsr (%eax) # encoding: [0x0f,0xae,0x10]
2320 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2322 ; X86-AVX-LABEL: test_mm_setcsr:
2324 ; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04]
2325 ; X86-AVX-NEXT: vldmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x10]
2326 ; X86-AVX-NEXT: retl # encoding: [0xc3]
2328 ; X64-SSE-LABEL: test_mm_setcsr:
2330 ; X64-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc]
2331 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
2332 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
2333 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2335 ; X64-AVX-LABEL: test_mm_setcsr:
2337 ; X64-AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc]
2338 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
2339 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
2340 ; X64-AVX-NEXT: retq # encoding: [0xc3]
2341 %st = alloca i32, align 4
2342 store i32 %a0, i32* %st, align 4
2343 %bc = bitcast i32* %st to i8*
2344 call void @llvm.x86.sse.ldmxcsr(i8* %bc)
2348 define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) nounwind {
2349 ; X86-SSE-LABEL: test_mm_setr_ps:
2351 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
2352 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
2353 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
2354 ; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
2355 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
2356 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2357 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08]
2358 ; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero
2359 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
2360 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
2361 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
2362 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2363 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
2364 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
2365 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2367 ; X86-AVX1-LABEL: test_mm_setr_ps:
2368 ; X86-AVX1: # %bb.0:
2369 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10]
2370 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
2371 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
2372 ; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero
2373 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08]
2374 ; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero
2375 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04]
2376 ; X86-AVX1-NEXT: # xmm3 = mem[0],zero,zero,zero
2377 ; X86-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10]
2378 ; X86-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2379 ; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20]
2380 ; X86-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
2381 ; X86-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30]
2382 ; X86-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0]
2383 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2385 ; X86-AVX512-LABEL: test_mm_setr_ps:
2386 ; X86-AVX512: # %bb.0:
2387 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10]
2388 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
2389 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
2390 ; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero
2391 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08]
2392 ; X86-AVX512-NEXT: # xmm2 = mem[0],zero,zero,zero
2393 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04]
2394 ; X86-AVX512-NEXT: # xmm3 = mem[0],zero,zero,zero
2395 ; X86-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10]
2396 ; X86-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2397 ; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20]
2398 ; X86-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
2399 ; X86-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30]
2400 ; X86-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0]
2401 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2403 ; X64-SSE-LABEL: test_mm_setr_ps:
2405 ; X64-SSE-NEXT: unpcklps %xmm3, %xmm2 # encoding: [0x0f,0x14,0xd3]
2406 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2407 ; X64-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1]
2408 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2409 ; X64-SSE-NEXT: movlhps %xmm2, %xmm0 # encoding: [0x0f,0x16,0xc2]
2410 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0]
2411 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2413 ; X64-AVX1-LABEL: test_mm_setr_ps:
2414 ; X64-AVX1: # %bb.0:
2415 ; X64-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10]
2416 ; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
2417 ; X64-AVX1-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20]
2418 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
2419 ; X64-AVX1-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30]
2420 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0]
2421 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2423 ; X64-AVX512-LABEL: test_mm_setr_ps:
2424 ; X64-AVX512: # %bb.0:
2425 ; X64-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10]
2426 ; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
2427 ; X64-AVX512-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20]
2428 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
2429 ; X64-AVX512-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30]
2430 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0]
2431 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2432 %res0 = insertelement <4 x float> undef, float %a0, i32 0
2433 %res1 = insertelement <4 x float> %res0, float %a1, i32 1
2434 %res2 = insertelement <4 x float> %res1, float %a2, i32 2
2435 %res3 = insertelement <4 x float> %res2, float %a3, i32 3
2436 ret <4 x float> %res3
2439 define <4 x float> @test_mm_setzero_ps() {
2440 ; SSE-LABEL: test_mm_setzero_ps:
2442 ; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
2443 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2445 ; AVX1-LABEL: test_mm_setzero_ps:
2447 ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
2448 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2450 ; AVX512-LABEL: test_mm_setzero_ps:
2452 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
2453 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2454 ret <4 x float> zeroinitializer
2457 define void @test_mm_sfence() nounwind {
2458 ; CHECK-LABEL: test_mm_sfence:
2460 ; CHECK-NEXT: sfence # encoding: [0x0f,0xae,0xf8]
2461 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2462 call void @llvm.x86.sse.sfence()
2465 declare void @llvm.x86.sse.sfence() nounwind readnone
2467 define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
2468 ; SSE-LABEL: test_mm_shuffle_ps:
2470 ; SSE-NEXT: shufps $0, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x00]
2471 ; SSE-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0]
2472 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2474 ; AVX1-LABEL: test_mm_shuffle_ps:
2476 ; AVX1-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc1,0x00]
2477 ; AVX1-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0]
2478 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2480 ; AVX512-LABEL: test_mm_shuffle_ps:
2482 ; AVX512-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc1,0x00]
2483 ; AVX512-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0]
2484 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2485 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
2486 ret <4 x float> %res
2489 define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) {
2490 ; SSE-LABEL: test_mm_sqrt_ps:
2492 ; SSE-NEXT: sqrtps %xmm0, %xmm0 # encoding: [0x0f,0x51,0xc0]
2493 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2495 ; AVX1-LABEL: test_mm_sqrt_ps:
2497 ; AVX1-NEXT: vsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x51,0xc0]
2498 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2500 ; AVX512-LABEL: test_mm_sqrt_ps:
2502 ; AVX512-NEXT: vsqrtps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0]
2503 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2504 %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0)
2505 ret <4 x float> %res
2507 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone
2509 define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) {
2510 ; SSE-LABEL: test_mm_sqrt_ss:
2512 ; SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0]
2513 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2515 ; AVX1-LABEL: test_mm_sqrt_ss:
2517 ; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0]
2518 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2520 ; AVX512-LABEL: test_mm_sqrt_ss:
2522 ; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
2523 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2524 %ext = extractelement <4 x float> %a0, i32 0
2525 %sqrt = call float @llvm.sqrt.f32(float %ext)
2526 %ins = insertelement <4 x float> %a0, float %sqrt, i32 0
2527 ret <4 x float> %ins
2529 declare float @llvm.sqrt.f32(float) nounwind readnone
2531 define float @test_mm_sqrt_ss_scalar(float %a0) {
2532 ; X86-SSE-LABEL: test_mm_sqrt_ss_scalar:
2534 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
2535 ; X86-SSE-NEXT: .cfi_def_cfa_offset 8
2536 ; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08]
2537 ; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
2538 ; X86-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0]
2539 ; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24]
2540 ; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
2541 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
2542 ; X86-SSE-NEXT: .cfi_def_cfa_offset 4
2543 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2545 ; X86-AVX1-LABEL: test_mm_sqrt_ss_scalar:
2546 ; X86-AVX1: # %bb.0:
2547 ; X86-AVX1-NEXT: pushl %eax # encoding: [0x50]
2548 ; X86-AVX1-NEXT: .cfi_def_cfa_offset 8
2549 ; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
2550 ; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero
2551 ; X86-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0]
2552 ; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24]
2553 ; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
2554 ; X86-AVX1-NEXT: popl %eax # encoding: [0x58]
2555 ; X86-AVX1-NEXT: .cfi_def_cfa_offset 4
2556 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2558 ; X86-AVX512-LABEL: test_mm_sqrt_ss_scalar:
2559 ; X86-AVX512: # %bb.0:
2560 ; X86-AVX512-NEXT: pushl %eax # encoding: [0x50]
2561 ; X86-AVX512-NEXT: .cfi_def_cfa_offset 8
2562 ; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
2563 ; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero
2564 ; X86-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
2565 ; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24]
2566 ; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
2567 ; X86-AVX512-NEXT: popl %eax # encoding: [0x58]
2568 ; X86-AVX512-NEXT: .cfi_def_cfa_offset 4
2569 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2571 ; X64-SSE-LABEL: test_mm_sqrt_ss_scalar:
2573 ; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0]
2574 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2576 ; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar:
2577 ; X64-AVX1: # %bb.0:
2578 ; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0]
2579 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2581 ; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar:
2582 ; X64-AVX512: # %bb.0:
2583 ; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
2584 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2585 %sqrt = call float @llvm.sqrt.f32(float %a0)
2589 define void @test_mm_store_ps(float *%a0, <4 x float> %a1) {
2590 ; X86-SSE-LABEL: test_mm_store_ps:
2592 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2593 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
2594 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2596 ; X86-AVX1-LABEL: test_mm_store_ps:
2597 ; X86-AVX1: # %bb.0:
2598 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2599 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
2600 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2602 ; X86-AVX512-LABEL: test_mm_store_ps:
2603 ; X86-AVX512: # %bb.0:
2604 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2605 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
2606 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2608 ; X64-SSE-LABEL: test_mm_store_ps:
2610 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
2611 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2613 ; X64-AVX1-LABEL: test_mm_store_ps:
2614 ; X64-AVX1: # %bb.0:
2615 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
2616 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2618 ; X64-AVX512-LABEL: test_mm_store_ps:
2619 ; X64-AVX512: # %bb.0:
2620 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
2621 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2622 %arg0 = bitcast float* %a0 to <4 x float>*
2623 store <4 x float> %a1, <4 x float>* %arg0, align 16
2627 define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) {
2628 ; X86-SSE-LABEL: test_mm_store_ps1:
2630 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2631 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2632 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2633 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
2634 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2636 ; X86-AVX1-LABEL: test_mm_store_ps1:
2637 ; X86-AVX1: # %bb.0:
2638 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2639 ; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
2640 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2641 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
2642 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2644 ; X86-AVX512-LABEL: test_mm_store_ps1:
2645 ; X86-AVX512: # %bb.0:
2646 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2647 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2648 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
2649 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2651 ; X64-SSE-LABEL: test_mm_store_ps1:
2653 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2654 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2655 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
2656 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2658 ; X64-AVX1-LABEL: test_mm_store_ps1:
2659 ; X64-AVX1: # %bb.0:
2660 ; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
2661 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2662 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
2663 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2665 ; X64-AVX512-LABEL: test_mm_store_ps1:
2666 ; X64-AVX512: # %bb.0:
2667 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2668 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
2669 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2670 %arg0 = bitcast float* %a0 to <4 x float>*
2671 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer
2672 store <4 x float> %shuf, <4 x float>* %arg0, align 16
2676 define void @test_mm_store_ss(float *%a0, <4 x float> %a1) {
2677 ; X86-SSE-LABEL: test_mm_store_ss:
2679 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2680 ; X86-SSE-NEXT: movss %xmm0, (%eax) # encoding: [0xf3,0x0f,0x11,0x00]
2681 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2683 ; X86-AVX1-LABEL: test_mm_store_ss:
2684 ; X86-AVX1: # %bb.0:
2685 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2686 ; X86-AVX1-NEXT: vmovss %xmm0, (%eax) # encoding: [0xc5,0xfa,0x11,0x00]
2687 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2689 ; X86-AVX512-LABEL: test_mm_store_ss:
2690 ; X86-AVX512: # %bb.0:
2691 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2692 ; X86-AVX512-NEXT: vmovss %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x00]
2693 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2695 ; X64-SSE-LABEL: test_mm_store_ss:
2697 ; X64-SSE-NEXT: movss %xmm0, (%rdi) # encoding: [0xf3,0x0f,0x11,0x07]
2698 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2700 ; X64-AVX1-LABEL: test_mm_store_ss:
2701 ; X64-AVX1: # %bb.0:
2702 ; X64-AVX1-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
2703 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2705 ; X64-AVX512-LABEL: test_mm_store_ss:
2706 ; X64-AVX512: # %bb.0:
2707 ; X64-AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
2708 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2709 %ext = extractelement <4 x float> %a1, i32 0
2710 store float %ext, float* %a0, align 1
2714 define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) {
2715 ; X86-SSE-LABEL: test_mm_store1_ps:
2717 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2718 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2719 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2720 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
2721 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2723 ; X86-AVX1-LABEL: test_mm_store1_ps:
2724 ; X86-AVX1: # %bb.0:
2725 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2726 ; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
2727 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2728 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
2729 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2731 ; X86-AVX512-LABEL: test_mm_store1_ps:
2732 ; X86-AVX512: # %bb.0:
2733 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2734 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2735 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
2736 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2738 ; X64-SSE-LABEL: test_mm_store1_ps:
2740 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2741 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2742 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
2743 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2745 ; X64-AVX1-LABEL: test_mm_store1_ps:
2746 ; X64-AVX1: # %bb.0:
2747 ; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00]
2748 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2749 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
2750 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2752 ; X64-AVX512-LABEL: test_mm_store1_ps:
2753 ; X64-AVX512: # %bb.0:
2754 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2755 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
2756 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2757 %arg0 = bitcast float* %a0 to <4 x float>*
2758 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer
2759 store <4 x float> %shuf, <4 x float>* %arg0, align 16
2763 define void @test_mm_storeh_ps(x86_mmx *%a0, <4 x float> %a1) nounwind {
2764 ; X86-SSE-LABEL: test_mm_storeh_ps:
2766 ; X86-SSE-NEXT: pushl %ebp # encoding: [0x55]
2767 ; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
2768 ; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0]
2769 ; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20]
2770 ; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08]
2771 ; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24]
2772 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
2773 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
2774 ; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04]
2775 ; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08]
2776 ; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
2777 ; X86-SSE-NEXT: popl %ebp # encoding: [0x5d]
2778 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2780 ; X86-AVX1-LABEL: test_mm_storeh_ps:
2781 ; X86-AVX1: # %bb.0:
2782 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2783 ; X86-AVX1-NEXT: vmovhpd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x17,0x00]
2784 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2786 ; X86-AVX512-LABEL: test_mm_storeh_ps:
2787 ; X86-AVX512: # %bb.0:
2788 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2789 ; X86-AVX512-NEXT: vmovhpd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x17,0x00]
2790 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2792 ; X64-SSE-LABEL: test_mm_storeh_ps:
2794 ; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8]
2795 ; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xf0]
2796 ; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2797 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2799 ; X64-AVX1-LABEL: test_mm_storeh_ps:
2800 ; X64-AVX1: # %bb.0:
2801 ; X64-AVX1-NEXT: vpextrq $1, %xmm0, %rax # encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01]
2802 ; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2803 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2805 ; X64-AVX512-LABEL: test_mm_storeh_ps:
2806 ; X64-AVX512: # %bb.0:
2807 ; X64-AVX512-NEXT: vpextrq $1, %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01]
2808 ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2809 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2810 %ptr = bitcast x86_mmx* %a0 to i64*
2811 %bc = bitcast <4 x float> %a1 to <2 x i64>
2812 %ext = extractelement <2 x i64> %bc, i32 1
2813 store i64 %ext, i64* %ptr
2817 define void @test_mm_storel_ps(x86_mmx *%a0, <4 x float> %a1) nounwind {
2818 ; X86-SSE-LABEL: test_mm_storel_ps:
2820 ; X86-SSE-NEXT: pushl %ebp # encoding: [0x55]
2821 ; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
2822 ; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0]
2823 ; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20]
2824 ; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08]
2825 ; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24]
2826 ; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24]
2827 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
2828 ; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04]
2829 ; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08]
2830 ; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
2831 ; X86-SSE-NEXT: popl %ebp # encoding: [0x5d]
2832 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2834 ; X86-AVX1-LABEL: test_mm_storel_ps:
2835 ; X86-AVX1: # %bb.0:
2836 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2837 ; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00]
2838 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2840 ; X86-AVX512-LABEL: test_mm_storel_ps:
2841 ; X86-AVX512: # %bb.0:
2842 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2843 ; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
2844 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2846 ; X64-SSE-LABEL: test_mm_storel_ps:
2848 ; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8]
2849 ; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8]
2850 ; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2851 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2853 ; X64-AVX1-LABEL: test_mm_storel_ps:
2854 ; X64-AVX1: # %bb.0:
2855 ; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
2856 ; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2857 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2859 ; X64-AVX512-LABEL: test_mm_storel_ps:
2860 ; X64-AVX512: # %bb.0:
2861 ; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
2862 ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2863 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2864 %ptr = bitcast x86_mmx* %a0 to i64*
2865 %bc = bitcast <4 x float> %a1 to <2 x i64>
2866 %ext = extractelement <2 x i64> %bc, i32 0
2867 store i64 %ext, i64* %ptr
2871 define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) {
2872 ; X86-SSE-LABEL: test_mm_storer_ps:
2874 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2875 ; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b]
2876 ; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0]
2877 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
2878 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2880 ; X86-AVX1-LABEL: test_mm_storer_ps:
2881 ; X86-AVX1: # %bb.0:
2882 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2883 ; X86-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
2884 ; X86-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0]
2885 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
2886 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2888 ; X86-AVX512-LABEL: test_mm_storer_ps:
2889 ; X86-AVX512: # %bb.0:
2890 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2891 ; X86-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
2892 ; X86-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0]
2893 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
2894 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2896 ; X64-SSE-LABEL: test_mm_storer_ps:
2898 ; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b]
2899 ; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0]
2900 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
2901 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2903 ; X64-AVX1-LABEL: test_mm_storer_ps:
2904 ; X64-AVX1: # %bb.0:
2905 ; X64-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
2906 ; X64-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0]
2907 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
2908 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2910 ; X64-AVX512-LABEL: test_mm_storer_ps:
2911 ; X64-AVX512: # %bb.0:
2912 ; X64-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
2913 ; X64-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0]
2914 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
2915 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2916 %arg0 = bitcast float* %a0 to <4 x float>*
2917 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2918 store <4 x float> %shuf, <4 x float>* %arg0, align 16
2922 define void @test_mm_storeu_ps(float *%a0, <4 x float> %a1) {
2923 ; X86-SSE-LABEL: test_mm_storeu_ps:
2925 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2926 ; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
2927 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2929 ; X86-AVX1-LABEL: test_mm_storeu_ps:
2930 ; X86-AVX1: # %bb.0:
2931 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2932 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
2933 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2935 ; X86-AVX512-LABEL: test_mm_storeu_ps:
2936 ; X86-AVX512: # %bb.0:
2937 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2938 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
2939 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2941 ; X64-SSE-LABEL: test_mm_storeu_ps:
2943 ; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07]
2944 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2946 ; X64-AVX1-LABEL: test_mm_storeu_ps:
2947 ; X64-AVX1: # %bb.0:
2948 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
2949 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2951 ; X64-AVX512-LABEL: test_mm_storeu_ps:
2952 ; X64-AVX512: # %bb.0:
2953 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
2954 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2955 %arg0 = bitcast float* %a0 to <4 x float>*
2956 store <4 x float> %a1, <4 x float>* %arg0, align 1
2960 define void @test_mm_stream_ps(float *%a0, <4 x float> %a1) {
2961 ; X86-SSE-LABEL: test_mm_stream_ps:
2963 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2964 ; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00]
2965 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2967 ; X86-AVX1-LABEL: test_mm_stream_ps:
2968 ; X86-AVX1: # %bb.0:
2969 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2970 ; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00]
2971 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2973 ; X86-AVX512-LABEL: test_mm_stream_ps:
2974 ; X86-AVX512: # %bb.0:
2975 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2976 ; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00]
2977 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2979 ; X64-SSE-LABEL: test_mm_stream_ps:
2981 ; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07]
2982 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2984 ; X64-AVX1-LABEL: test_mm_stream_ps:
2985 ; X64-AVX1: # %bb.0:
2986 ; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07]
2987 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2989 ; X64-AVX512-LABEL: test_mm_stream_ps:
2990 ; X64-AVX512: # %bb.0:
2991 ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
2992 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2993 %arg0 = bitcast float* %a0 to <4 x float>*
2994 store <4 x float> %a1, <4 x float>* %arg0, align 16, !nontemporal !0
2998 define <4 x float> @test_mm_sub_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
2999 ; SSE-LABEL: test_mm_sub_ps:
3001 ; SSE-NEXT: subps %xmm1, %xmm0 # encoding: [0x0f,0x5c,0xc1]
3002 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3004 ; AVX1-LABEL: test_mm_sub_ps:
3006 ; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5c,0xc1]
3007 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3009 ; AVX512-LABEL: test_mm_sub_ps:
3011 ; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1]
3012 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3013 %res = fsub <4 x float> %a0, %a1
3014 ret <4 x float> %res
3017 define <4 x float> @test_mm_sub_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3018 ; SSE-LABEL: test_mm_sub_ss:
3020 ; SSE-NEXT: subss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5c,0xc1]
3021 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3023 ; AVX1-LABEL: test_mm_sub_ss:
3025 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5c,0xc1]
3026 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3028 ; AVX512-LABEL: test_mm_sub_ss:
3030 ; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1]
3031 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3032 %ext0 = extractelement <4 x float> %a0, i32 0
3033 %ext1 = extractelement <4 x float> %a1, i32 0
3034 %fsub = fsub float %ext0, %ext1
3035 %res = insertelement <4 x float> %a0, float %fsub, i32 0
3036 ret <4 x float> %res
3039 define void @test_MM_TRANSPOSE4_PS(<4 x float>* %a0, <4 x float>* %a1, <4 x float>* %a2, <4 x float>* %a3) nounwind {
3040 ; X86-SSE-LABEL: test_MM_TRANSPOSE4_PS:
3042 ; X86-SSE-NEXT: pushl %esi # encoding: [0x56]
3043 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
3044 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
3045 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
3046 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
3047 ; X86-SSE-NEXT: movaps (%esi), %xmm0 # encoding: [0x0f,0x28,0x06]
3048 ; X86-SSE-NEXT: movaps (%edx), %xmm1 # encoding: [0x0f,0x28,0x0a]
3049 ; X86-SSE-NEXT: movaps (%ecx), %xmm2 # encoding: [0x0f,0x28,0x11]
3050 ; X86-SSE-NEXT: movaps (%eax), %xmm3 # encoding: [0x0f,0x28,0x18]
3051 ; X86-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0]
3052 ; X86-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1]
3053 ; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
3054 ; X86-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea]
3055 ; X86-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb]
3056 ; X86-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
3057 ; X86-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1]
3058 ; X86-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3059 ; X86-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3]
3060 ; X86-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3061 ; X86-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc]
3062 ; X86-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd]
3063 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0]
3064 ; X86-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec]
3065 ; X86-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1]
3066 ; X86-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
3067 ; X86-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda]
3068 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0]
3069 ; X86-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0]
3070 ; X86-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1]
3071 ; X86-SSE-NEXT: movaps %xmm1, (%esi) # encoding: [0x0f,0x29,0x0e]
3072 ; X86-SSE-NEXT: movaps %xmm5, (%edx) # encoding: [0x0f,0x29,0x2a]
3073 ; X86-SSE-NEXT: movaps %xmm3, (%ecx) # encoding: [0x0f,0x29,0x19]
3074 ; X86-SSE-NEXT: movaps %xmm2, (%eax) # encoding: [0x0f,0x29,0x10]
3075 ; X86-SSE-NEXT: popl %esi # encoding: [0x5e]
3076 ; X86-SSE-NEXT: retl # encoding: [0xc3]
3078 ; X86-AVX1-LABEL: test_MM_TRANSPOSE4_PS:
3079 ; X86-AVX1: # %bb.0:
3080 ; X86-AVX1-NEXT: pushl %esi # encoding: [0x56]
3081 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
3082 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
3083 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
3084 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
3085 ; X86-AVX1-NEXT: vmovaps (%esi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x06]
3086 ; X86-AVX1-NEXT: vmovaps (%edx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a]
3087 ; X86-AVX1-NEXT: vmovaps (%ecx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x11]
3088 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm3 # encoding: [0xc5,0xf8,0x28,0x18]
3089 ; X86-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1]
3090 ; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3091 ; X86-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb]
3092 ; X86-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3093 ; X86-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1]
3094 ; X86-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3095 ; X86-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb]
3096 ; X86-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3097 ; X86-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5]
3098 ; X86-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0]
3099 ; X86-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd]
3100 ; X86-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1]
3101 ; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1]
3102 ; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0]
3103 ; X86-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
3104 ; X86-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1]
3105 ; X86-AVX1-NEXT: vmovaps %xmm2, (%esi) # encoding: [0xc5,0xf8,0x29,0x16]
3106 ; X86-AVX1-NEXT: vmovaps %xmm3, (%edx) # encoding: [0xc5,0xf8,0x29,0x1a]
3107 ; X86-AVX1-NEXT: vmovaps %xmm4, (%ecx) # encoding: [0xc5,0xf8,0x29,0x21]
3108 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
3109 ; X86-AVX1-NEXT: popl %esi # encoding: [0x5e]
3110 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
3112 ; X86-AVX512-LABEL: test_MM_TRANSPOSE4_PS:
3113 ; X86-AVX512: # %bb.0:
3114 ; X86-AVX512-NEXT: pushl %esi # encoding: [0x56]
3115 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
3116 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
3117 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
3118 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
3119 ; X86-AVX512-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06]
3120 ; X86-AVX512-NEXT: vmovaps (%edx), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0a]
3121 ; X86-AVX512-NEXT: vmovaps (%ecx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x11]
3122 ; X86-AVX512-NEXT: vmovaps (%eax), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x18]
3123 ; X86-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1]
3124 ; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3125 ; X86-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb]
3126 ; X86-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3127 ; X86-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
3128 ; X86-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3129 ; X86-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb]
3130 ; X86-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3131 ; X86-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5]
3132 ; X86-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0]
3133 ; X86-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd]
3134 ; X86-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1]
3135 ; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1]
3136 ; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0]
3137 ; X86-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
3138 ; X86-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1]
3139 ; X86-AVX512-NEXT: vmovaps %xmm2, (%esi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x16]
3140 ; X86-AVX512-NEXT: vmovaps %xmm3, (%edx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1a]
3141 ; X86-AVX512-NEXT: vmovaps %xmm4, (%ecx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x21]
3142 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
3143 ; X86-AVX512-NEXT: popl %esi # encoding: [0x5e]
3144 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
3146 ; X64-SSE-LABEL: test_MM_TRANSPOSE4_PS:
3148 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
3149 ; X64-SSE-NEXT: movaps (%rsi), %xmm1 # encoding: [0x0f,0x28,0x0e]
3150 ; X64-SSE-NEXT: movaps (%rdx), %xmm2 # encoding: [0x0f,0x28,0x12]
3151 ; X64-SSE-NEXT: movaps (%rcx), %xmm3 # encoding: [0x0f,0x28,0x19]
3152 ; X64-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0]
3153 ; X64-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1]
3154 ; X64-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
3155 ; X64-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea]
3156 ; X64-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb]
3157 ; X64-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
3158 ; X64-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1]
3159 ; X64-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3160 ; X64-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3]
3161 ; X64-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3162 ; X64-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc]
3163 ; X64-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd]
3164 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0]
3165 ; X64-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec]
3166 ; X64-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1]
3167 ; X64-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
3168 ; X64-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda]
3169 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0]
3170 ; X64-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0]
3171 ; X64-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1]
3172 ; X64-SSE-NEXT: movaps %xmm1, (%rdi) # encoding: [0x0f,0x29,0x0f]
3173 ; X64-SSE-NEXT: movaps %xmm5, (%rsi) # encoding: [0x0f,0x29,0x2e]
3174 ; X64-SSE-NEXT: movaps %xmm3, (%rdx) # encoding: [0x0f,0x29,0x1a]
3175 ; X64-SSE-NEXT: movaps %xmm2, (%rcx) # encoding: [0x0f,0x29,0x11]
3176 ; X64-SSE-NEXT: retq # encoding: [0xc3]
3178 ; X64-AVX1-LABEL: test_MM_TRANSPOSE4_PS:
3179 ; X64-AVX1: # %bb.0:
3180 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
3181 ; X64-AVX1-NEXT: vmovaps (%rsi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0e]
3182 ; X64-AVX1-NEXT: vmovaps (%rdx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x12]
3183 ; X64-AVX1-NEXT: vmovaps (%rcx), %xmm3 # encoding: [0xc5,0xf8,0x28,0x19]
3184 ; X64-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1]
3185 ; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3186 ; X64-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb]
3187 ; X64-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3188 ; X64-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1]
3189 ; X64-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3190 ; X64-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb]
3191 ; X64-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3192 ; X64-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5]
3193 ; X64-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0]
3194 ; X64-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd]
3195 ; X64-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1]
3196 ; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1]
3197 ; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0]
3198 ; X64-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
3199 ; X64-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1]
3200 ; X64-AVX1-NEXT: vmovaps %xmm2, (%rdi) # encoding: [0xc5,0xf8,0x29,0x17]
3201 ; X64-AVX1-NEXT: vmovaps %xmm3, (%rsi) # encoding: [0xc5,0xf8,0x29,0x1e]
3202 ; X64-AVX1-NEXT: vmovaps %xmm4, (%rdx) # encoding: [0xc5,0xf8,0x29,0x22]
3203 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rcx) # encoding: [0xc5,0xf8,0x29,0x01]
3204 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
3206 ; X64-AVX512-LABEL: test_MM_TRANSPOSE4_PS:
3207 ; X64-AVX512: # %bb.0:
3208 ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
3209 ; X64-AVX512-NEXT: vmovaps (%rsi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0e]
3210 ; X64-AVX512-NEXT: vmovaps (%rdx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x12]
3211 ; X64-AVX512-NEXT: vmovaps (%rcx), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x19]
3212 ; X64-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1]
3213 ; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3214 ; X64-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb]
3215 ; X64-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3216 ; X64-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
3217 ; X64-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3218 ; X64-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb]
3219 ; X64-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3220 ; X64-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5]
3221 ; X64-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0]
3222 ; X64-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd]
3223 ; X64-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1]
3224 ; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1]
3225 ; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0]
3226 ; X64-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
3227 ; X64-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1]
3228 ; X64-AVX512-NEXT: vmovaps %xmm2, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x17]
3229 ; X64-AVX512-NEXT: vmovaps %xmm3, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1e]
3230 ; X64-AVX512-NEXT: vmovaps %xmm4, (%rdx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x22]
3231 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rcx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x01]
3232 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
3233 %row0 = load <4 x float>, <4 x float>* %a0, align 16
3234 %row1 = load <4 x float>, <4 x float>* %a1, align 16
3235 %row2 = load <4 x float>, <4 x float>* %a2, align 16
3236 %row3 = load <4 x float>, <4 x float>* %a3, align 16
3237 %tmp0 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3238 %tmp2 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3239 %tmp1 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3240 %tmp3 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3241 %res0 = shufflevector <4 x float> %tmp0, <4 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3242 %res1 = shufflevector <4 x float> %tmp2, <4 x float> %tmp0, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
3243 %res2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3244 %res3 = shufflevector <4 x float> %tmp3, <4 x float> %tmp1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
3245 store <4 x float> %res0, <4 x float>* %a0, align 16
3246 store <4 x float> %res1, <4 x float>* %a1, align 16
3247 store <4 x float> %res2, <4 x float>* %a2, align 16
3248 store <4 x float> %res3, <4 x float>* %a3, align 16
3252 define i32 @test_mm_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3253 ; SSE-LABEL: test_mm_ucomieq_ss:
3255 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1]
3256 ; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
3257 ; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
3258 ; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
3259 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3260 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3262 ; AVX1-LABEL: test_mm_ucomieq_ss:
3264 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1]
3265 ; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
3266 ; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
3267 ; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
3268 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3269 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3271 ; AVX512-LABEL: test_mm_ucomieq_ss:
3273 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
3274 ; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
3275 ; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
3276 ; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
3277 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3278 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3279 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
3282 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
3284 define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3285 ; SSE-LABEL: test_mm_ucomige_ss:
3287 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3288 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1]
3289 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3290 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3292 ; AVX1-LABEL: test_mm_ucomige_ss:
3294 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3295 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1]
3296 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3297 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3299 ; AVX512-LABEL: test_mm_ucomige_ss:
3301 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3302 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
3303 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3304 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3305 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1)
3308 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
3310 define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3311 ; SSE-LABEL: test_mm_ucomigt_ss:
3313 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3314 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1]
3315 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3316 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3318 ; AVX1-LABEL: test_mm_ucomigt_ss:
3320 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3321 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1]
3322 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3323 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3325 ; AVX512-LABEL: test_mm_ucomigt_ss:
3327 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3328 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
3329 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3330 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3331 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1)
3334 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
3336 define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3337 ; SSE-LABEL: test_mm_ucomile_ss:
3339 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3340 ; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8]
3341 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3342 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3344 ; AVX1-LABEL: test_mm_ucomile_ss:
3346 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3347 ; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8]
3348 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3349 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3351 ; AVX512-LABEL: test_mm_ucomile_ss:
3353 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3354 ; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8]
3355 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3356 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3357 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1)
3360 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
3362 define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3363 ; SSE-LABEL: test_mm_ucomilt_ss:
3365 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3366 ; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8]
3367 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3368 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3370 ; AVX1-LABEL: test_mm_ucomilt_ss:
3372 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3373 ; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8]
3374 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3375 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3377 ; AVX512-LABEL: test_mm_ucomilt_ss:
3379 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3380 ; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8]
3381 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3382 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3383 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1)
3386 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
3388 define i32 @test_mm_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3389 ; SSE-LABEL: test_mm_ucomineq_ss:
3391 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1]
3392 ; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
3393 ; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
3394 ; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
3395 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3396 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3398 ; AVX1-LABEL: test_mm_ucomineq_ss:
3400 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1]
3401 ; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
3402 ; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
3403 ; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
3404 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3405 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3407 ; AVX512-LABEL: test_mm_ucomineq_ss:
3409 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
3410 ; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
3411 ; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
3412 ; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
3413 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3414 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3415 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1)
3418 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
3420 define <4 x float> @test_mm_undefined_ps() {
3421 ; CHECK-LABEL: test_mm_undefined_ps:
3423 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3424 ret <4 x float> undef
3427 define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
3428 ; SSE-LABEL: test_mm_unpackhi_ps:
3430 ; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1]
3431 ; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3432 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3434 ; AVX1-LABEL: test_mm_unpackhi_ps:
3436 ; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1]
3437 ; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3438 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3440 ; AVX512-LABEL: test_mm_unpackhi_ps:
3442 ; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
3443 ; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3444 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3445 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3446 ret <4 x float> %res
3449 define <4 x float> @test_mm_unpacklo_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
3450 ; SSE-LABEL: test_mm_unpacklo_ps:
3452 ; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1]
3453 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3454 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3456 ; AVX1-LABEL: test_mm_unpacklo_ps:
3458 ; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1]
3459 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3460 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3462 ; AVX512-LABEL: test_mm_unpacklo_ps:
3464 ; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1]
3465 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3466 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3467 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3468 ret <4 x float> %res
3471 define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
3472 ; SSE-LABEL: test_mm_xor_ps:
3474 ; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1]
3475 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3477 ; AVX1-LABEL: test_mm_xor_ps:
3479 ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
3480 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3482 ; AVX512-LABEL: test_mm_xor_ps:
3484 ; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
3485 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3486 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
3487 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
3488 %res = xor <4 x i32> %arg0, %arg1
3489 %bc = bitcast <4 x i32> %res to <4 x float>