1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -show-mc-encoding < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE,X86-SSE1
3 ; RUN: llc -show-mc-encoding < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE,X86-SSE2
4 ; RUN: llc -show-mc-encoding < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
5 ; RUN: llc -show-mc-encoding < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
6 ; RUN: llc -show-mc-encoding < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE,X64-SSE1
7 ; RUN: llc -show-mc-encoding < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE,X64-SSE2
8 ; RUN: llc -show-mc-encoding < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
9 ; RUN: llc -show-mc-encoding < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
11 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse-builtins.c
13 define <4 x float> @test_mm_add_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
14 ; SSE-LABEL: test_mm_add_ps:
16 ; SSE-NEXT: addps %xmm1, %xmm0 # encoding: [0x0f,0x58,0xc1]
17 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
19 ; AVX1-LABEL: test_mm_add_ps:
21 ; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc1]
22 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
24 ; AVX512-LABEL: test_mm_add_ps:
26 ; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
27 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
28 %res = fadd <4 x float> %a0, %a1
32 define <4 x float> @test_mm_add_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
33 ; SSE-LABEL: test_mm_add_ss:
35 ; SSE-NEXT: addss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x58,0xc1]
36 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
38 ; AVX1-LABEL: test_mm_add_ss:
40 ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x58,0xc1]
41 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
43 ; AVX512-LABEL: test_mm_add_ss:
45 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1]
46 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
47 %ext0 = extractelement <4 x float> %a0, i32 0
48 %ext1 = extractelement <4 x float> %a1, i32 0
49 %fadd = fadd float %ext0, %ext1
50 %res = insertelement <4 x float> %a0, float %fadd, i32 0
54 define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
55 ; SSE-LABEL: test_mm_and_ps:
57 ; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
58 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
60 ; AVX1-LABEL: test_mm_and_ps:
62 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
63 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
65 ; AVX512-LABEL: test_mm_and_ps:
67 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
68 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
69 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
70 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
71 %res = and <4 x i32> %arg0, %arg1
72 %bc = bitcast <4 x i32> %res to <4 x float>
76 define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
77 ; X86-SSE1-LABEL: test_mm_andnot_ps:
79 ; X86-SSE1-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1]
80 ; X86-SSE1-NEXT: retl # encoding: [0xc3]
82 ; X86-SSE2-LABEL: test_mm_andnot_ps:
84 ; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
85 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
86 ; X86-SSE2-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
87 ; X86-SSE2-NEXT: retl # encoding: [0xc3]
89 ; AVX1-LABEL: test_mm_andnot_ps:
91 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
92 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
93 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
94 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
96 ; AVX512-LABEL: test_mm_andnot_ps:
98 ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
99 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
100 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
102 ; X64-SSE1-LABEL: test_mm_andnot_ps:
104 ; X64-SSE1-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1]
105 ; X64-SSE1-NEXT: retq # encoding: [0xc3]
107 ; X64-SSE2-LABEL: test_mm_andnot_ps:
109 ; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
110 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
111 ; X64-SSE2-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
112 ; X64-SSE2-NEXT: retq # encoding: [0xc3]
113 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
114 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
115 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
116 %res = and <4 x i32> %not, %arg1
117 %bc = bitcast <4 x i32> %res to <4 x float>
121 define <4 x float> @test_mm_cmpeq_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
122 ; SSE-LABEL: test_mm_cmpeq_ps:
124 ; SSE-NEXT: cmpeqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x00]
125 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
127 ; AVX1-LABEL: test_mm_cmpeq_ps:
129 ; AVX1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
130 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
132 ; AVX512-LABEL: test_mm_cmpeq_ps:
134 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00]
135 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
136 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
137 %cmp = fcmp oeq <4 x float> %a0, %a1
138 %sext = sext <4 x i1> %cmp to <4 x i32>
139 %res = bitcast <4 x i32> %sext to <4 x float>
143 define <4 x float> @test_mm_cmpeq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
144 ; SSE-LABEL: test_mm_cmpeq_ss:
146 ; SSE-NEXT: cmpeqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x00]
147 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
149 ; AVX-LABEL: test_mm_cmpeq_ss:
151 ; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x00]
152 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
153 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
156 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
158 define <4 x float> @test_mm_cmpge_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
159 ; SSE-LABEL: test_mm_cmpge_ps:
161 ; SSE-NEXT: cmpleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x02]
162 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
163 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
165 ; AVX1-LABEL: test_mm_cmpge_ps:
167 ; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x02]
168 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
170 ; AVX512-LABEL: test_mm_cmpge_ps:
172 ; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x02]
173 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
174 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
175 %cmp = fcmp ole <4 x float> %a1, %a0
176 %sext = sext <4 x i1> %cmp to <4 x i32>
177 %res = bitcast <4 x i32> %sext to <4 x float>
181 define <4 x float> @test_mm_cmpge_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
182 ; SSE-LABEL: test_mm_cmpge_ss:
184 ; SSE-NEXT: cmpless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x02]
185 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
186 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
187 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
189 ; AVX-LABEL: test_mm_cmpge_ss:
191 ; AVX-NEXT: vcmpless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x02]
192 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
193 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
194 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
195 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 2)
196 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
200 define <4 x float> @test_mm_cmpgt_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
201 ; SSE-LABEL: test_mm_cmpgt_ps:
203 ; SSE-NEXT: cmpltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x01]
204 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
205 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
207 ; AVX1-LABEL: test_mm_cmpgt_ps:
209 ; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x01]
210 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
212 ; AVX512-LABEL: test_mm_cmpgt_ps:
214 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x01]
215 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
216 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
217 %cmp = fcmp olt <4 x float> %a1, %a0
218 %sext = sext <4 x i1> %cmp to <4 x i32>
219 %res = bitcast <4 x i32> %sext to <4 x float>
223 define <4 x float> @test_mm_cmpgt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
224 ; SSE-LABEL: test_mm_cmpgt_ss:
226 ; SSE-NEXT: cmpltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x01]
227 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
228 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
229 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
231 ; AVX-LABEL: test_mm_cmpgt_ss:
233 ; AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x01]
234 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
235 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
236 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
237 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 1)
238 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
242 define <4 x float> @test_mm_cmple_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
243 ; SSE-LABEL: test_mm_cmple_ps:
245 ; SSE-NEXT: cmpleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x02]
246 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
248 ; AVX1-LABEL: test_mm_cmple_ps:
250 ; AVX1-NEXT: vcmpleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x02]
251 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
253 ; AVX512-LABEL: test_mm_cmple_ps:
255 ; AVX512-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
256 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
257 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
258 %cmp = fcmp ole <4 x float> %a0, %a1
259 %sext = sext <4 x i1> %cmp to <4 x i32>
260 %res = bitcast <4 x i32> %sext to <4 x float>
264 define <4 x float> @test_mm_cmple_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
265 ; SSE-LABEL: test_mm_cmple_ss:
267 ; SSE-NEXT: cmpless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x02]
268 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
270 ; AVX-LABEL: test_mm_cmple_ss:
272 ; AVX-NEXT: vcmpless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x02]
273 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
274 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 2)
278 define <4 x float> @test_mm_cmplt_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
279 ; SSE-LABEL: test_mm_cmplt_ps:
281 ; SSE-NEXT: cmpltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x01]
282 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
284 ; AVX1-LABEL: test_mm_cmplt_ps:
286 ; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x01]
287 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
289 ; AVX512-LABEL: test_mm_cmplt_ps:
291 ; AVX512-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01]
292 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
293 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
294 %cmp = fcmp olt <4 x float> %a0, %a1
295 %sext = sext <4 x i1> %cmp to <4 x i32>
296 %res = bitcast <4 x i32> %sext to <4 x float>
300 define <4 x float> @test_mm_cmplt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
301 ; SSE-LABEL: test_mm_cmplt_ss:
303 ; SSE-NEXT: cmpltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x01]
304 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
306 ; AVX-LABEL: test_mm_cmplt_ss:
308 ; AVX-NEXT: vcmpltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x01]
309 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
310 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 1)
314 define <4 x float> @test_mm_cmpneq_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
315 ; SSE-LABEL: test_mm_cmpneq_ps:
317 ; SSE-NEXT: cmpneqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x04]
318 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
320 ; AVX1-LABEL: test_mm_cmpneq_ps:
322 ; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x04]
323 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
325 ; AVX512-LABEL: test_mm_cmpneq_ps:
327 ; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x04]
328 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
329 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
330 %cmp = fcmp une <4 x float> %a0, %a1
331 %sext = sext <4 x i1> %cmp to <4 x i32>
332 %res = bitcast <4 x i32> %sext to <4 x float>
336 define <4 x float> @test_mm_cmpneq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
337 ; SSE-LABEL: test_mm_cmpneq_ss:
339 ; SSE-NEXT: cmpneqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x04]
340 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
342 ; AVX-LABEL: test_mm_cmpneq_ss:
344 ; AVX-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x04]
345 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
346 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 4)
350 define <4 x float> @test_mm_cmpnge_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
351 ; SSE-LABEL: test_mm_cmpnge_ps:
353 ; SSE-NEXT: cmpnleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x06]
354 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
355 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
357 ; AVX1-LABEL: test_mm_cmpnge_ps:
359 ; AVX1-NEXT: vcmpnleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x06]
360 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
362 ; AVX512-LABEL: test_mm_cmpnge_ps:
364 ; AVX512-NEXT: vcmpnleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x06]
365 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
366 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
367 %cmp = fcmp ugt <4 x float> %a1, %a0
368 %sext = sext <4 x i1> %cmp to <4 x i32>
369 %res = bitcast <4 x i32> %sext to <4 x float>
373 define <4 x float> @test_mm_cmpnge_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
374 ; SSE-LABEL: test_mm_cmpnge_ss:
376 ; SSE-NEXT: cmpnless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x06]
377 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
378 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
379 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
381 ; AVX-LABEL: test_mm_cmpnge_ss:
383 ; AVX-NEXT: vcmpnless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x06]
384 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
385 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
386 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
387 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 6)
388 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
392 define <4 x float> @test_mm_cmpngt_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
393 ; SSE-LABEL: test_mm_cmpngt_ps:
395 ; SSE-NEXT: cmpnltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x05]
396 ; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
397 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
399 ; AVX1-LABEL: test_mm_cmpngt_ps:
401 ; AVX1-NEXT: vcmpnltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x05]
402 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
404 ; AVX512-LABEL: test_mm_cmpngt_ps:
406 ; AVX512-NEXT: vcmpnltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x05]
407 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
408 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
409 %cmp = fcmp uge <4 x float> %a1, %a0
410 %sext = sext <4 x i1> %cmp to <4 x i32>
411 %res = bitcast <4 x i32> %sext to <4 x float>
415 define <4 x float> @test_mm_cmpngt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
416 ; SSE-LABEL: test_mm_cmpngt_ss:
418 ; SSE-NEXT: cmpnltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x05]
419 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
420 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
421 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
423 ; AVX-LABEL: test_mm_cmpngt_ss:
425 ; AVX-NEXT: vcmpnltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x05]
426 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
427 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
428 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
429 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 5)
430 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
434 define <4 x float> @test_mm_cmpnle_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
435 ; SSE-LABEL: test_mm_cmpnle_ps:
437 ; SSE-NEXT: cmpnleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x06]
438 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
440 ; AVX1-LABEL: test_mm_cmpnle_ps:
442 ; AVX1-NEXT: vcmpnleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x06]
443 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
445 ; AVX512-LABEL: test_mm_cmpnle_ps:
447 ; AVX512-NEXT: vcmpnleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x06]
448 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
449 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
450 %cmp = fcmp ugt <4 x float> %a0, %a1
451 %sext = sext <4 x i1> %cmp to <4 x i32>
452 %res = bitcast <4 x i32> %sext to <4 x float>
456 define <4 x float> @test_mm_cmpnle_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
457 ; SSE-LABEL: test_mm_cmpnle_ss:
459 ; SSE-NEXT: cmpnless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x06]
460 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
462 ; AVX-LABEL: test_mm_cmpnle_ss:
464 ; AVX-NEXT: vcmpnless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x06]
465 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
466 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 6)
470 define <4 x float> @test_mm_cmpnlt_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
471 ; SSE-LABEL: test_mm_cmpnlt_ps:
473 ; SSE-NEXT: cmpnltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x05]
474 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
476 ; AVX1-LABEL: test_mm_cmpnlt_ps:
478 ; AVX1-NEXT: vcmpnltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x05]
479 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
481 ; AVX512-LABEL: test_mm_cmpnlt_ps:
483 ; AVX512-NEXT: vcmpnltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x05]
484 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
485 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
486 %cmp = fcmp uge <4 x float> %a0, %a1
487 %sext = sext <4 x i1> %cmp to <4 x i32>
488 %res = bitcast <4 x i32> %sext to <4 x float>
492 define <4 x float> @test_mm_cmpnlt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
493 ; SSE-LABEL: test_mm_cmpnlt_ss:
495 ; SSE-NEXT: cmpnltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x05]
496 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
498 ; AVX-LABEL: test_mm_cmpnlt_ss:
500 ; AVX-NEXT: vcmpnltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x05]
501 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
502 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 5)
506 define <4 x float> @test_mm_cmpord_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
507 ; SSE-LABEL: test_mm_cmpord_ps:
509 ; SSE-NEXT: cmpordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x07]
510 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
512 ; AVX1-LABEL: test_mm_cmpord_ps:
514 ; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x07]
515 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
517 ; AVX512-LABEL: test_mm_cmpord_ps:
519 ; AVX512-NEXT: vcmpordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x07]
520 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
521 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
522 %cmp = fcmp ord <4 x float> %a0, %a1
523 %sext = sext <4 x i1> %cmp to <4 x i32>
524 %res = bitcast <4 x i32> %sext to <4 x float>
528 define <4 x float> @test_mm_cmpord_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
529 ; SSE-LABEL: test_mm_cmpord_ss:
531 ; SSE-NEXT: cmpordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x07]
532 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
534 ; AVX-LABEL: test_mm_cmpord_ss:
536 ; AVX-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x07]
537 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
538 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7)
542 define <4 x float> @test_mm_cmpunord_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
543 ; SSE-LABEL: test_mm_cmpunord_ps:
545 ; SSE-NEXT: cmpunordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x03]
546 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
548 ; AVX1-LABEL: test_mm_cmpunord_ps:
550 ; AVX1-NEXT: vcmpunordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x03]
551 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
553 ; AVX512-LABEL: test_mm_cmpunord_ps:
555 ; AVX512-NEXT: vcmpunordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x03]
556 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
557 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
558 %cmp = fcmp uno <4 x float> %a0, %a1
559 %sext = sext <4 x i1> %cmp to <4 x i32>
560 %res = bitcast <4 x i32> %sext to <4 x float>
564 define <4 x float> @test_mm_cmpunord_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
565 ; SSE-LABEL: test_mm_cmpunord_ss:
567 ; SSE-NEXT: cmpunordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x03]
568 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
570 ; AVX-LABEL: test_mm_cmpunord_ss:
572 ; AVX-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x03]
573 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
574 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 3)
578 define i32 @test_mm_comieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
579 ; SSE-LABEL: test_mm_comieq_ss:
581 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1]
582 ; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
583 ; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
584 ; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
585 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
586 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
588 ; AVX1-LABEL: test_mm_comieq_ss:
590 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1]
591 ; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
592 ; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
593 ; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
594 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
595 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
597 ; AVX512-LABEL: test_mm_comieq_ss:
599 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
600 ; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
601 ; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
602 ; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
603 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
604 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
605 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
608 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
610 define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
611 ; SSE-LABEL: test_mm_comige_ss:
613 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
614 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1]
615 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
616 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
618 ; AVX1-LABEL: test_mm_comige_ss:
620 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
621 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1]
622 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
623 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
625 ; AVX512-LABEL: test_mm_comige_ss:
627 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
628 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
629 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
630 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
631 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1)
634 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
636 define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
637 ; SSE-LABEL: test_mm_comigt_ss:
639 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
640 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1]
641 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
642 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
644 ; AVX1-LABEL: test_mm_comigt_ss:
646 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
647 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1]
648 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
649 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
651 ; AVX512-LABEL: test_mm_comigt_ss:
653 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
654 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
655 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
656 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
657 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1)
660 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
662 define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
663 ; SSE-LABEL: test_mm_comile_ss:
665 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
666 ; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8]
667 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
668 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
670 ; AVX1-LABEL: test_mm_comile_ss:
672 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
673 ; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8]
674 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
675 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
677 ; AVX512-LABEL: test_mm_comile_ss:
679 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
680 ; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8]
681 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
682 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
683 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1)
686 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
688 define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
689 ; SSE-LABEL: test_mm_comilt_ss:
691 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
692 ; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8]
693 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
694 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
696 ; AVX1-LABEL: test_mm_comilt_ss:
698 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
699 ; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8]
700 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
701 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
703 ; AVX512-LABEL: test_mm_comilt_ss:
705 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
706 ; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8]
707 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
708 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
709 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1)
712 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
714 define i32 @test_mm_comineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
715 ; SSE-LABEL: test_mm_comineq_ss:
717 ; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1]
718 ; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
719 ; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
720 ; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
721 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
722 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
724 ; AVX1-LABEL: test_mm_comineq_ss:
726 ; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1]
727 ; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
728 ; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
729 ; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
730 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
731 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
733 ; AVX512-LABEL: test_mm_comineq_ss:
735 ; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
736 ; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
737 ; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
738 ; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
739 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
740 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
741 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1)
744 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
746 define i32 @test_mm_cvt_ss2si(<4 x float> %a0) nounwind {
747 ; SSE-LABEL: test_mm_cvt_ss2si:
749 ; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0]
750 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
752 ; AVX1-LABEL: test_mm_cvt_ss2si:
754 ; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0]
755 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
757 ; AVX512-LABEL: test_mm_cvt_ss2si:
759 ; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0]
760 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
761 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
764 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
766 define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind {
767 ; X86-SSE-LABEL: test_mm_cvtsi32_ss:
769 ; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04]
770 ; X86-SSE-NEXT: retl # encoding: [0xc3]
772 ; X86-AVX1-LABEL: test_mm_cvtsi32_ss:
774 ; X86-AVX1-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04]
775 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
777 ; X86-AVX512-LABEL: test_mm_cvtsi32_ss:
778 ; X86-AVX512: # %bb.0:
779 ; X86-AVX512-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04]
780 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
782 ; X64-SSE-LABEL: test_mm_cvtsi32_ss:
784 ; X64-SSE-NEXT: cvtsi2ss %edi, %xmm0 # encoding: [0xf3,0x0f,0x2a,0xc7]
785 ; X64-SSE-NEXT: retq # encoding: [0xc3]
787 ; X64-AVX1-LABEL: test_mm_cvtsi32_ss:
789 ; X64-AVX1-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0xc7]
790 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
792 ; X64-AVX512-LABEL: test_mm_cvtsi32_ss:
793 ; X64-AVX512: # %bb.0:
794 ; X64-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7]
795 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
796 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1)
799 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
801 define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind {
802 ; X86-SSE-LABEL: test_mm_cvtss_f32:
804 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
805 ; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24]
806 ; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
807 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
808 ; X86-SSE-NEXT: retl # encoding: [0xc3]
810 ; X86-AVX1-LABEL: test_mm_cvtss_f32:
812 ; X86-AVX1-NEXT: pushl %eax # encoding: [0x50]
813 ; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24]
814 ; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
815 ; X86-AVX1-NEXT: popl %eax # encoding: [0x58]
816 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
818 ; X86-AVX512-LABEL: test_mm_cvtss_f32:
819 ; X86-AVX512: # %bb.0:
820 ; X86-AVX512-NEXT: pushl %eax # encoding: [0x50]
821 ; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24]
822 ; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
823 ; X86-AVX512-NEXT: popl %eax # encoding: [0x58]
824 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
826 ; X64-LABEL: test_mm_cvtss_f32:
828 ; X64-NEXT: retq # encoding: [0xc3]
829 %res = extractelement <4 x float> %a0, i32 0
833 define i32 @test_mm_cvtss_si32(<4 x float> %a0) nounwind {
834 ; SSE-LABEL: test_mm_cvtss_si32:
836 ; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0]
837 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
839 ; AVX1-LABEL: test_mm_cvtss_si32:
841 ; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0]
842 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
844 ; AVX512-LABEL: test_mm_cvtss_si32:
846 ; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0]
847 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
848 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
852 define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind {
853 ; SSE-LABEL: test_mm_cvttss_si:
855 ; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0]
856 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
858 ; AVX1-LABEL: test_mm_cvttss_si:
860 ; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0]
861 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
863 ; AVX512-LABEL: test_mm_cvttss_si:
865 ; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0]
866 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
867 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
870 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
872 define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind {
873 ; SSE-LABEL: test_mm_cvttss_si32:
875 ; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0]
876 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
878 ; AVX1-LABEL: test_mm_cvttss_si32:
880 ; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0]
881 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
883 ; AVX512-LABEL: test_mm_cvttss_si32:
885 ; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0]
886 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
887 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
891 define <4 x float> @test_mm_div_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
892 ; SSE-LABEL: test_mm_div_ps:
894 ; SSE-NEXT: divps %xmm1, %xmm0 # encoding: [0x0f,0x5e,0xc1]
895 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
897 ; AVX1-LABEL: test_mm_div_ps:
899 ; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5e,0xc1]
900 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
902 ; AVX512-LABEL: test_mm_div_ps:
904 ; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1]
905 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
906 %res = fdiv <4 x float> %a0, %a1
910 define <4 x float> @test_mm_div_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
911 ; SSE-LABEL: test_mm_div_ss:
913 ; SSE-NEXT: divss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5e,0xc1]
914 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
916 ; AVX1-LABEL: test_mm_div_ss:
918 ; AVX1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5e,0xc1]
919 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
921 ; AVX512-LABEL: test_mm_div_ss:
923 ; AVX512-NEXT: vdivss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1]
924 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
925 %ext0 = extractelement <4 x float> %a0, i32 0
926 %ext1 = extractelement <4 x float> %a1, i32 0
927 %fdiv = fdiv float %ext0, %ext1
928 %res = insertelement <4 x float> %a0, float %fdiv, i32 0
932 define i32 @test_MM_GET_EXCEPTION_MASK() nounwind {
933 ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK:
935 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
936 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
937 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
938 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
939 ; X86-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
940 ; X86-SSE-NEXT: # imm = 0x1F80
941 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
942 ; X86-SSE-NEXT: retl # encoding: [0xc3]
944 ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK:
946 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
947 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
948 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
949 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
950 ; X86-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
951 ; X86-AVX-NEXT: # imm = 0x1F80
952 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
953 ; X86-AVX-NEXT: retl # encoding: [0xc3]
955 ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK:
957 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
958 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
959 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
960 ; X64-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
961 ; X64-SSE-NEXT: # imm = 0x1F80
962 ; X64-SSE-NEXT: retq # encoding: [0xc3]
964 ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK:
966 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
967 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
968 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
969 ; X64-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
970 ; X64-AVX-NEXT: # imm = 0x1F80
971 ; X64-AVX-NEXT: retq # encoding: [0xc3]
972 %1 = alloca i32, align 4
973 %2 = bitcast ptr %1 to ptr
974 call void @llvm.x86.sse.stmxcsr(ptr %2)
975 %3 = load i32, ptr %1, align 4
976 %4 = and i32 %3, 8064
979 declare void @llvm.x86.sse.stmxcsr(ptr) nounwind readnone
981 define i32 @test_MM_GET_EXCEPTION_STATE() nounwind {
982 ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE:
984 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
985 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
986 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
987 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
988 ; X86-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
989 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
990 ; X86-SSE-NEXT: retl # encoding: [0xc3]
992 ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE:
994 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
995 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
996 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
997 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
998 ; X86-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
999 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
1000 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1002 ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE:
1004 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1005 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1006 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1007 ; X64-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
1008 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1010 ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE:
1012 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1013 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1014 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1015 ; X64-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
1016 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1017 %1 = alloca i32, align 4
1018 %2 = bitcast ptr %1 to ptr
1019 call void @llvm.x86.sse.stmxcsr(ptr %2)
1020 %3 = load i32, ptr %1, align 4
1025 define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind {
1026 ; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
1028 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1029 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1030 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
1031 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1032 ; X86-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
1033 ; X86-SSE-NEXT: # imm = 0x8000
1034 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
1035 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1037 ; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
1039 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1040 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1041 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
1042 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1043 ; X86-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
1044 ; X86-AVX-NEXT: # imm = 0x8000
1045 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
1046 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1048 ; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
1050 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1051 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1052 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1053 ; X64-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
1054 ; X64-SSE-NEXT: # imm = 0x8000
1055 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1057 ; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
1059 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1060 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1061 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1062 ; X64-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
1063 ; X64-AVX-NEXT: # imm = 0x8000
1064 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1065 %1 = alloca i32, align 4
1066 %2 = bitcast ptr %1 to ptr
1067 call void @llvm.x86.sse.stmxcsr(ptr %2)
1068 %3 = load i32, ptr %1, align 4
1069 %4 = and i32 %3, 32768
1073 define i32 @test_MM_GET_ROUNDING_MODE() nounwind {
1074 ; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE:
1076 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1077 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1078 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
1079 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1080 ; X86-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
1081 ; X86-SSE-NEXT: # imm = 0x6000
1082 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
1083 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1085 ; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE:
1087 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1088 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1089 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
1090 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1091 ; X86-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
1092 ; X86-AVX-NEXT: # imm = 0x6000
1093 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
1094 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1096 ; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE:
1098 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1099 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1100 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1101 ; X64-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
1102 ; X64-SSE-NEXT: # imm = 0x6000
1103 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1105 ; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE:
1107 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1108 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1109 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1110 ; X64-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
1111 ; X64-AVX-NEXT: # imm = 0x6000
1112 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1113 %1 = alloca i32, align 4
1114 %2 = bitcast ptr %1 to ptr
1115 call void @llvm.x86.sse.stmxcsr(ptr %2)
1116 %3 = load i32, ptr %1, align 4
1117 %4 = and i32 %3, 24576
1121 define i32 @test_mm_getcsr() nounwind {
1122 ; X86-SSE-LABEL: test_mm_getcsr:
1124 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1125 ; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1126 ; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
1127 ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1128 ; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
1129 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1131 ; X86-AVX-LABEL: test_mm_getcsr:
1133 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1134 ; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
1135 ; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
1136 ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
1137 ; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
1138 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1140 ; X64-SSE-LABEL: test_mm_getcsr:
1142 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1143 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1144 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1145 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1147 ; X64-AVX-LABEL: test_mm_getcsr:
1149 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1150 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1151 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
1152 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1153 %1 = alloca i32, align 4
1154 %2 = bitcast ptr %1 to ptr
1155 call void @llvm.x86.sse.stmxcsr(ptr %2)
1156 %3 = load i32, ptr %1, align 4
1160 define <4 x float> @test_mm_load_ps(ptr %a0) nounwind {
1161 ; X86-SSE-LABEL: test_mm_load_ps:
1163 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1164 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
1165 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1167 ; X86-AVX1-LABEL: test_mm_load_ps:
1168 ; X86-AVX1: # %bb.0:
1169 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1170 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
1171 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1173 ; X86-AVX512-LABEL: test_mm_load_ps:
1174 ; X86-AVX512: # %bb.0:
1175 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1176 ; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
1177 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1179 ; X64-SSE-LABEL: test_mm_load_ps:
1181 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
1182 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1184 ; X64-AVX1-LABEL: test_mm_load_ps:
1185 ; X64-AVX1: # %bb.0:
1186 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
1187 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1189 ; X64-AVX512-LABEL: test_mm_load_ps:
1190 ; X64-AVX512: # %bb.0:
1191 ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
1192 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1193 %arg0 = bitcast ptr %a0 to ptr
1194 %res = load <4 x float>, ptr %arg0, align 16
1195 ret <4 x float> %res
1198 define <4 x float> @test_mm_load_ps1(ptr %a0) nounwind {
1199 ; X86-SSE-LABEL: test_mm_load_ps1:
1201 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1202 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1203 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x00]
1204 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
1205 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
1206 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1208 ; X86-AVX1-LABEL: test_mm_load_ps1:
1209 ; X86-AVX1: # %bb.0:
1210 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1211 ; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00]
1212 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1214 ; X86-AVX512-LABEL: test_mm_load_ps1:
1215 ; X86-AVX512: # %bb.0:
1216 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1217 ; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00]
1218 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1220 ; X64-SSE-LABEL: test_mm_load_ps1:
1222 ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1223 ; X64-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x07]
1224 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
1225 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
1226 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1228 ; X64-AVX1-LABEL: test_mm_load_ps1:
1229 ; X64-AVX1: # %bb.0:
1230 ; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07]
1231 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1233 ; X64-AVX512-LABEL: test_mm_load_ps1:
1234 ; X64-AVX512: # %bb.0:
1235 ; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07]
1236 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1237 %ld = load float, ptr %a0, align 4
1238 %res0 = insertelement <4 x float> undef, float %ld, i32 0
1239 %res1 = insertelement <4 x float> %res0, float %ld, i32 1
1240 %res2 = insertelement <4 x float> %res1, float %ld, i32 2
1241 %res3 = insertelement <4 x float> %res2, float %ld, i32 3
1242 ret <4 x float> %res3
1245 define <4 x float> @test_mm_load_ss(ptr %a0) nounwind {
1246 ; X86-SSE-LABEL: test_mm_load_ss:
1248 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1249 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1250 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x00]
1251 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1253 ; X86-AVX1-LABEL: test_mm_load_ss:
1254 ; X86-AVX1: # %bb.0:
1255 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1256 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1257 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x00]
1258 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1260 ; X86-AVX512-LABEL: test_mm_load_ss:
1261 ; X86-AVX512: # %bb.0:
1262 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1263 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1264 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
1265 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1267 ; X64-SSE-LABEL: test_mm_load_ss:
1269 ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1270 ; X64-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x07]
1271 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1273 ; X64-AVX1-LABEL: test_mm_load_ss:
1274 ; X64-AVX1: # %bb.0:
1275 ; X64-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1276 ; X64-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x07]
1277 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1279 ; X64-AVX512-LABEL: test_mm_load_ss:
1280 ; X64-AVX512: # %bb.0:
1281 ; X64-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1282 ; X64-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
1283 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1284 %ld = load float, ptr %a0, align 1
1285 %res0 = insertelement <4 x float> undef, float %ld, i32 0
1286 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1
1287 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2
1288 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3
1289 ret <4 x float> %res3
1292 define <4 x float> @test_mm_load1_ps(ptr %a0) nounwind {
1293 ; X86-SSE-LABEL: test_mm_load1_ps:
1295 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1296 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1297 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x00]
1298 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
1299 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
1300 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1302 ; X86-AVX1-LABEL: test_mm_load1_ps:
1303 ; X86-AVX1: # %bb.0:
1304 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1305 ; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00]
1306 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1308 ; X86-AVX512-LABEL: test_mm_load1_ps:
1309 ; X86-AVX512: # %bb.0:
1310 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1311 ; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00]
1312 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1314 ; X64-SSE-LABEL: test_mm_load1_ps:
1316 ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1317 ; X64-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x07]
1318 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
1319 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
1320 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1322 ; X64-AVX1-LABEL: test_mm_load1_ps:
1323 ; X64-AVX1: # %bb.0:
1324 ; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07]
1325 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1327 ; X64-AVX512-LABEL: test_mm_load1_ps:
1328 ; X64-AVX512: # %bb.0:
1329 ; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07]
1330 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1331 %ld = load float, ptr %a0, align 4
1332 %res0 = insertelement <4 x float> undef, float %ld, i32 0
1333 %res1 = insertelement <4 x float> %res0, float %ld, i32 1
1334 %res2 = insertelement <4 x float> %res1, float %ld, i32 2
1335 %res3 = insertelement <4 x float> %res2, float %ld, i32 3
1336 ret <4 x float> %res3
1339 define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, ptr %a1) {
1340 ; X86-SSE-LABEL: test_mm_loadh_pi:
1342 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1343 ; X86-SSE-NEXT: movhps (%eax), %xmm0 # encoding: [0x0f,0x16,0x00]
1344 ; X86-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
1345 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1347 ; X86-AVX1-LABEL: test_mm_loadh_pi:
1348 ; X86-AVX1: # %bb.0:
1349 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1350 ; X86-AVX1-NEXT: vmovhps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x00]
1351 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
1352 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1354 ; X86-AVX512-LABEL: test_mm_loadh_pi:
1355 ; X86-AVX512: # %bb.0:
1356 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1357 ; X86-AVX512-NEXT: vmovhps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x00]
1358 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
1359 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1361 ; X64-SSE-LABEL: test_mm_loadh_pi:
1363 ; X64-SSE-NEXT: movhps (%rdi), %xmm0 # encoding: [0x0f,0x16,0x07]
1364 ; X64-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
1365 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1367 ; X64-AVX1-LABEL: test_mm_loadh_pi:
1368 ; X64-AVX1: # %bb.0:
1369 ; X64-AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x07]
1370 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
1371 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1373 ; X64-AVX512-LABEL: test_mm_loadh_pi:
1374 ; X64-AVX512: # %bb.0:
1375 ; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07]
1376 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
1377 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1378 %ptr = bitcast ptr %a1 to ptr
1379 %ld = load <2 x float>, ptr %ptr
1380 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1381 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1382 ret <4 x float> %res
1385 define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, ptr %a1) {
1386 ; X86-SSE-LABEL: test_mm_loadl_pi:
1388 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1389 ; X86-SSE-NEXT: movlps (%eax), %xmm0 # encoding: [0x0f,0x12,0x00]
1390 ; X86-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
1391 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1393 ; X86-AVX1-LABEL: test_mm_loadl_pi:
1394 ; X86-AVX1: # %bb.0:
1395 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1396 ; X86-AVX1-NEXT: vmovlps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x00]
1397 ; X86-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
1398 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1400 ; X86-AVX512-LABEL: test_mm_loadl_pi:
1401 ; X86-AVX512: # %bb.0:
1402 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1403 ; X86-AVX512-NEXT: vmovlps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x00]
1404 ; X86-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
1405 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1407 ; X64-SSE-LABEL: test_mm_loadl_pi:
1409 ; X64-SSE-NEXT: movlps (%rdi), %xmm0 # encoding: [0x0f,0x12,0x07]
1410 ; X64-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
1411 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1413 ; X64-AVX1-LABEL: test_mm_loadl_pi:
1414 ; X64-AVX1: # %bb.0:
1415 ; X64-AVX1-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x07]
1416 ; X64-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
1417 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1419 ; X64-AVX512-LABEL: test_mm_loadl_pi:
1420 ; X64-AVX512: # %bb.0:
1421 ; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07]
1422 ; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
1423 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1424 %ptr = bitcast ptr %a1 to ptr
1425 %ld = load <2 x float>, ptr %ptr
1426 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1427 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1428 ret <4 x float> %res
1431 define <4 x float> @test_mm_loadr_ps(ptr %a0) nounwind {
1432 ; X86-SSE-LABEL: test_mm_loadr_ps:
1434 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1435 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
1436 ; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b]
1437 ; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0]
1438 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1440 ; X86-AVX1-LABEL: test_mm_loadr_ps:
1441 ; X86-AVX1: # %bb.0:
1442 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1443 ; X86-AVX1-NEXT: vpermilps $27, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b]
1444 ; X86-AVX1-NEXT: # xmm0 = mem[3,2,1,0]
1445 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1447 ; X86-AVX512-LABEL: test_mm_loadr_ps:
1448 ; X86-AVX512: # %bb.0:
1449 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1450 ; X86-AVX512-NEXT: vpermilps $27, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b]
1451 ; X86-AVX512-NEXT: # xmm0 = mem[3,2,1,0]
1452 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1454 ; X64-SSE-LABEL: test_mm_loadr_ps:
1456 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
1457 ; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b]
1458 ; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0]
1459 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1461 ; X64-AVX1-LABEL: test_mm_loadr_ps:
1462 ; X64-AVX1: # %bb.0:
1463 ; X64-AVX1-NEXT: vpermilps $27, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b]
1464 ; X64-AVX1-NEXT: # xmm0 = mem[3,2,1,0]
1465 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1467 ; X64-AVX512-LABEL: test_mm_loadr_ps:
1468 ; X64-AVX512: # %bb.0:
1469 ; X64-AVX512-NEXT: vpermilps $27, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b]
1470 ; X64-AVX512-NEXT: # xmm0 = mem[3,2,1,0]
1471 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1472 %arg0 = bitcast ptr %a0 to ptr
1473 %ld = load <4 x float>, ptr %arg0, align 16
1474 %res = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1475 ret <4 x float> %res
1478 define <4 x float> @test_mm_loadu_ps(ptr %a0) nounwind {
1479 ; X86-SSE-LABEL: test_mm_loadu_ps:
1481 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1482 ; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
1483 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1485 ; X86-AVX1-LABEL: test_mm_loadu_ps:
1486 ; X86-AVX1: # %bb.0:
1487 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1488 ; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
1489 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1491 ; X86-AVX512-LABEL: test_mm_loadu_ps:
1492 ; X86-AVX512: # %bb.0:
1493 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1494 ; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
1495 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1497 ; X64-SSE-LABEL: test_mm_loadu_ps:
1499 ; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
1500 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1502 ; X64-AVX1-LABEL: test_mm_loadu_ps:
1503 ; X64-AVX1: # %bb.0:
1504 ; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
1505 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1507 ; X64-AVX512-LABEL: test_mm_loadu_ps:
1508 ; X64-AVX512: # %bb.0:
1509 ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
1510 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1511 %arg0 = bitcast ptr %a0 to ptr
1512 %res = load <4 x float>, ptr %arg0, align 1
1513 ret <4 x float> %res
1516 define <4 x float> @test_mm_max_ps(<4 x float> %a0, <4 x float> %a1) {
1517 ; SSE-LABEL: test_mm_max_ps:
1519 ; SSE-NEXT: maxps %xmm1, %xmm0 # encoding: [0x0f,0x5f,0xc1]
1520 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1522 ; AVX1-LABEL: test_mm_max_ps:
1524 ; AVX1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5f,0xc1]
1525 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1527 ; AVX512-LABEL: test_mm_max_ps:
1529 ; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
1530 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1531 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
1532 ret <4 x float> %res
1534 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1536 define <4 x float> @test_mm_max_ss(<4 x float> %a0, <4 x float> %a1) {
1537 ; SSE-LABEL: test_mm_max_ss:
1539 ; SSE-NEXT: maxss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5f,0xc1]
1540 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1542 ; AVX1-LABEL: test_mm_max_ss:
1544 ; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5f,0xc1]
1545 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1547 ; AVX512-LABEL: test_mm_max_ss:
1549 ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5f,0xc1]
1550 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1551 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
1552 ret <4 x float> %res
1554 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1556 define <4 x float> @test_mm_min_ps(<4 x float> %a0, <4 x float> %a1) {
1557 ; SSE-LABEL: test_mm_min_ps:
1559 ; SSE-NEXT: minps %xmm1, %xmm0 # encoding: [0x0f,0x5d,0xc1]
1560 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1562 ; AVX1-LABEL: test_mm_min_ps:
1564 ; AVX1-NEXT: vminps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5d,0xc1]
1565 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1567 ; AVX512-LABEL: test_mm_min_ps:
1569 ; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
1570 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1571 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1572 ret <4 x float> %res
1574 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1576 define <4 x float> @test_mm_min_ss(<4 x float> %a0, <4 x float> %a1) {
1577 ; SSE-LABEL: test_mm_min_ss:
1579 ; SSE-NEXT: minss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5d,0xc1]
1580 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1582 ; AVX1-LABEL: test_mm_min_ss:
1584 ; AVX1-NEXT: vminss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5d,0xc1]
1585 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1587 ; AVX512-LABEL: test_mm_min_ss:
1589 ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5d,0xc1]
1590 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1591 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
1592 ret <4 x float> %res
1594 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1596 define <4 x float> @test_mm_move_ss(<4 x float> %a0, <4 x float> %a1) {
1597 ; SSE-LABEL: test_mm_move_ss:
1599 ; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
1600 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
1601 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1603 ; AVX-LABEL: test_mm_move_ss:
1605 ; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
1606 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
1607 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1608 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1609 ret <4 x float> %res
1612 define <4 x float> @test_mm_movehl_ps(<4 x float> %a0, <4 x float> %a1) {
1613 ; SSE-LABEL: test_mm_movehl_ps:
1615 ; SSE-NEXT: movhlps %xmm1, %xmm0 # encoding: [0x0f,0x12,0xc1]
1616 ; SSE-NEXT: # xmm0 = xmm1[1],xmm0[1]
1617 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1619 ; AVX1-LABEL: test_mm_movehl_ps:
1621 ; AVX1-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x15,0xc0]
1622 ; AVX1-NEXT: # xmm0 = xmm1[1],xmm0[1]
1623 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1625 ; AVX512-LABEL: test_mm_movehl_ps:
1627 ; AVX512-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x15,0xc0]
1628 ; AVX512-NEXT: # xmm0 = xmm1[1],xmm0[1]
1629 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1630 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1631 ret <4 x float> %res
1634 define <4 x float> @test_mm_movelh_ps(<4 x float> %a0, <4 x float> %a1) {
1635 ; SSE-LABEL: test_mm_movelh_ps:
1637 ; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
1638 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
1639 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1641 ; AVX1-LABEL: test_mm_movelh_ps:
1643 ; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
1644 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0]
1645 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1647 ; AVX512-LABEL: test_mm_movelh_ps:
1649 ; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
1650 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0]
1651 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1652 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1653 ret <4 x float> %res
1656 define i32 @test_mm_movemask_ps(<4 x float> %a0) nounwind {
1657 ; SSE-LABEL: test_mm_movemask_ps:
1659 ; SSE-NEXT: movmskps %xmm0, %eax # encoding: [0x0f,0x50,0xc0]
1660 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1662 ; AVX-LABEL: test_mm_movemask_ps:
1664 ; AVX-NEXT: vmovmskps %xmm0, %eax # encoding: [0xc5,0xf8,0x50,0xc0]
1665 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1666 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
1669 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1671 define <4 x float> @test_mm_mul_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
1672 ; SSE-LABEL: test_mm_mul_ps:
1674 ; SSE-NEXT: mulps %xmm1, %xmm0 # encoding: [0x0f,0x59,0xc1]
1675 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1677 ; AVX1-LABEL: test_mm_mul_ps:
1679 ; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x59,0xc1]
1680 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1682 ; AVX512-LABEL: test_mm_mul_ps:
1684 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1]
1685 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1686 %res = fmul <4 x float> %a0, %a1
1687 ret <4 x float> %res
1690 define <4 x float> @test_mm_mul_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
1691 ; SSE-LABEL: test_mm_mul_ss:
1693 ; SSE-NEXT: mulss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x59,0xc1]
1694 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1696 ; AVX1-LABEL: test_mm_mul_ss:
1698 ; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x59,0xc1]
1699 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1701 ; AVX512-LABEL: test_mm_mul_ss:
1703 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1]
1704 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1705 %ext0 = extractelement <4 x float> %a0, i32 0
1706 %ext1 = extractelement <4 x float> %a1, i32 0
1707 %fmul = fmul float %ext0, %ext1
1708 %res = insertelement <4 x float> %a0, float %fmul, i32 0
1709 ret <4 x float> %res
1712 define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
1713 ; SSE-LABEL: test_mm_or_ps:
1715 ; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1]
1716 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1718 ; AVX1-LABEL: test_mm_or_ps:
1720 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
1721 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1723 ; AVX512-LABEL: test_mm_or_ps:
1725 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
1726 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1727 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
1728 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
1729 %res = or <4 x i32> %arg0, %arg1
1730 %bc = bitcast <4 x i32> %res to <4 x float>
1734 define void @test_mm_prefetch(ptr %a0) {
1735 ; X86-LABEL: test_mm_prefetch:
1737 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1738 ; X86-NEXT: prefetchnta (%eax) # encoding: [0x0f,0x18,0x00]
1739 ; X86-NEXT: retl # encoding: [0xc3]
1741 ; X64-LABEL: test_mm_prefetch:
1743 ; X64-NEXT: prefetchnta (%rdi) # encoding: [0x0f,0x18,0x07]
1744 ; X64-NEXT: retq # encoding: [0xc3]
1745 call void @llvm.prefetch(ptr %a0, i32 0, i32 0, i32 1)
1748 declare void @llvm.prefetch(ptr nocapture, i32, i32, i32) nounwind readnone
1750 define <4 x float> @test_mm_rcp_ps(<4 x float> %a0) {
1751 ; SSE-LABEL: test_mm_rcp_ps:
1753 ; SSE-NEXT: rcpps %xmm0, %xmm0 # encoding: [0x0f,0x53,0xc0]
1754 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1756 ; AVX-LABEL: test_mm_rcp_ps:
1758 ; AVX-NEXT: vrcpps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x53,0xc0]
1759 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1760 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
1761 ret <4 x float> %res
1763 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1765 define <4 x float> @test_mm_rcp_ss(<4 x float> %a0) {
1766 ; SSE-LABEL: test_mm_rcp_ss:
1768 ; SSE-NEXT: rcpss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x53,0xc0]
1769 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1771 ; AVX-LABEL: test_mm_rcp_ss:
1773 ; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x53,0xc0]
1774 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1775 %rcp = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0)
1776 ret <4 x float> %rcp
1778 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1780 define <4 x float> @test_mm_rsqrt_ps(<4 x float> %a0) {
1781 ; SSE-LABEL: test_mm_rsqrt_ps:
1783 ; SSE-NEXT: rsqrtps %xmm0, %xmm0 # encoding: [0x0f,0x52,0xc0]
1784 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1786 ; AVX-LABEL: test_mm_rsqrt_ps:
1788 ; AVX-NEXT: vrsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x52,0xc0]
1789 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1790 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
1791 ret <4 x float> %res
1793 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1795 define <4 x float> @test_mm_rsqrt_ss(<4 x float> %a0) {
1796 ; SSE-LABEL: test_mm_rsqrt_ss:
1798 ; SSE-NEXT: rsqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x52,0xc0]
1799 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1801 ; AVX-LABEL: test_mm_rsqrt_ss:
1803 ; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x52,0xc0]
1804 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1805 %rsqrt = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0)
1806 ret <4 x float> %rsqrt
1808 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
1810 define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind {
1811 ; X86-SSE-LABEL: test_MM_SET_EXCEPTION_MASK:
1813 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1814 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1815 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1816 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
1817 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1818 ; X86-SSE-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff]
1819 ; X86-SSE-NEXT: # imm = 0xE07F
1820 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1821 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1822 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
1823 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
1824 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1826 ; X86-AVX-LABEL: test_MM_SET_EXCEPTION_MASK:
1828 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1829 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1830 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1831 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
1832 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1833 ; X86-AVX-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff]
1834 ; X86-AVX-NEXT: # imm = 0xE07F
1835 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1836 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1837 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
1838 ; X86-AVX-NEXT: popl %eax # encoding: [0x58]
1839 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1841 ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK:
1843 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1844 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1845 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1846 ; X64-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff]
1847 ; X64-SSE-NEXT: # imm = 0xE07F
1848 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1849 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1850 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
1851 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1853 ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK:
1855 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1856 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1857 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1858 ; X64-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff]
1859 ; X64-AVX-NEXT: # imm = 0xE07F
1860 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1861 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1862 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
1863 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1864 %1 = alloca i32, align 4
1865 %2 = bitcast ptr %1 to ptr
1866 call void @llvm.x86.sse.stmxcsr(ptr %2)
1867 %3 = load i32, ptr %1
1868 %4 = and i32 %3, -8065
1870 store i32 %5, ptr %1
1871 call void @llvm.x86.sse.ldmxcsr(ptr %2)
1874 declare void @llvm.x86.sse.ldmxcsr(ptr) nounwind readnone
1876 define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind {
1877 ; X86-SSE-LABEL: test_MM_SET_EXCEPTION_STATE:
1879 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1880 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1881 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1882 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
1883 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1884 ; X86-SSE-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0]
1885 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1886 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1887 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
1888 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
1889 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1891 ; X86-AVX-LABEL: test_MM_SET_EXCEPTION_STATE:
1893 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1894 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1895 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1896 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
1897 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1898 ; X86-AVX-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0]
1899 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1900 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1901 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
1902 ; X86-AVX-NEXT: popl %eax # encoding: [0x58]
1903 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1905 ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE:
1907 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1908 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1909 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1910 ; X64-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0]
1911 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1912 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1913 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
1914 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1916 ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE:
1918 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1919 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1920 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1921 ; X64-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0]
1922 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1923 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1924 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
1925 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1926 %1 = alloca i32, align 4
1927 %2 = bitcast ptr %1 to ptr
1928 call void @llvm.x86.sse.stmxcsr(ptr %2)
1929 %3 = load i32, ptr %1
1930 %4 = and i32 %3, -64
1932 store i32 %5, ptr %1
1933 call void @llvm.x86.sse.ldmxcsr(ptr %2)
1937 define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind {
1938 ; X86-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
1940 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
1941 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1942 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1943 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
1944 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1945 ; X86-SSE-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff]
1946 ; X86-SSE-NEXT: # imm = 0xFFFF7FFF
1947 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1948 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1949 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
1950 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
1951 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1953 ; X86-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
1955 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
1956 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1957 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
1958 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
1959 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
1960 ; X86-AVX-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff]
1961 ; X86-AVX-NEXT: # imm = 0xFFFF7FFF
1962 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
1963 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
1964 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
1965 ; X86-AVX-NEXT: popl %eax # encoding: [0x58]
1966 ; X86-AVX-NEXT: retl # encoding: [0xc3]
1968 ; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
1970 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1971 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
1972 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1973 ; X64-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff]
1974 ; X64-SSE-NEXT: # imm = 0xFFFF7FFF
1975 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1976 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1977 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
1978 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1980 ; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
1982 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
1983 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
1984 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
1985 ; X64-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff]
1986 ; X64-AVX-NEXT: # imm = 0xFFFF7FFF
1987 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
1988 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
1989 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
1990 ; X64-AVX-NEXT: retq # encoding: [0xc3]
1991 %1 = alloca i32, align 4
1992 %2 = bitcast ptr %1 to ptr
1993 call void @llvm.x86.sse.stmxcsr(ptr %2)
1994 %3 = load i32, ptr %1
1995 %4 = and i32 %3, -32769
1997 store i32 %5, ptr %1
1998 call void @llvm.x86.sse.ldmxcsr(ptr %2)
2002 define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) nounwind {
2003 ; X86-SSE-LABEL: test_mm_set_ps:
2005 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2006 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
2007 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2008 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
2009 ; X86-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1]
2010 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2011 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2012 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08]
2013 ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2014 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x04]
2015 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca]
2016 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2017 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
2018 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
2019 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2021 ; X86-AVX1-LABEL: test_mm_set_ps:
2022 ; X86-AVX1: # %bb.0:
2023 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2024 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10]
2025 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2026 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
2027 ; X86-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10]
2028 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
2029 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2030 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08]
2031 ; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20]
2032 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
2033 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2034 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04]
2035 ; X86-AVX1-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30]
2036 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0]
2037 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2039 ; X86-AVX512-LABEL: test_mm_set_ps:
2040 ; X86-AVX512: # %bb.0:
2041 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2042 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10]
2043 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2044 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
2045 ; X86-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10]
2046 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
2047 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2048 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08]
2049 ; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20]
2050 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
2051 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2052 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04]
2053 ; X86-AVX512-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30]
2054 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0]
2055 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2057 ; X64-SSE-LABEL: test_mm_set_ps:
2059 ; X64-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
2060 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2061 ; X64-SSE-NEXT: unpcklps %xmm2, %xmm3 # encoding: [0x0f,0x14,0xda]
2062 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
2063 ; X64-SSE-NEXT: movlhps %xmm1, %xmm3 # encoding: [0x0f,0x16,0xd9]
2064 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm1[0]
2065 ; X64-SSE-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3]
2066 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2068 ; X64-AVX1-LABEL: test_mm_set_ps:
2069 ; X64-AVX1: # %bb.0:
2070 ; X64-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10]
2071 ; X64-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2072 ; X64-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20]
2073 ; X64-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
2074 ; X64-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30]
2075 ; X64-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0]
2076 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2078 ; X64-AVX512-LABEL: test_mm_set_ps:
2079 ; X64-AVX512: # %bb.0:
2080 ; X64-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10]
2081 ; X64-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2082 ; X64-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20]
2083 ; X64-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
2084 ; X64-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30]
2085 ; X64-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0]
2086 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2087 %res0 = insertelement <4 x float> undef, float %a3, i32 0
2088 %res1 = insertelement <4 x float> %res0, float %a2, i32 1
2089 %res2 = insertelement <4 x float> %res1, float %a1, i32 2
2090 %res3 = insertelement <4 x float> %res2, float %a0, i32 3
2091 ret <4 x float> %res3
2094 define <4 x float> @test_mm_set_ps1(float %a0) nounwind {
2095 ; X86-SSE-LABEL: test_mm_set_ps1:
2097 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2098 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
2099 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2100 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2101 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2103 ; X86-AVX1-LABEL: test_mm_set_ps1:
2104 ; X86-AVX1: # %bb.0:
2105 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2106 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2107 ; X86-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
2108 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2109 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2111 ; X86-AVX512-LABEL: test_mm_set_ps1:
2112 ; X86-AVX512: # %bb.0:
2113 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2114 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2115 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2116 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2118 ; X64-SSE-LABEL: test_mm_set_ps1:
2120 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2121 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2122 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2124 ; X64-AVX1-LABEL: test_mm_set_ps1:
2125 ; X64-AVX1: # %bb.0:
2126 ; X64-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
2127 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2128 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2130 ; X64-AVX512-LABEL: test_mm_set_ps1:
2131 ; X64-AVX512: # %bb.0:
2132 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2133 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2134 %res0 = insertelement <4 x float> undef, float %a0, i32 0
2135 %res1 = insertelement <4 x float> %res0, float %a0, i32 1
2136 %res2 = insertelement <4 x float> %res1, float %a0, i32 2
2137 %res3 = insertelement <4 x float> %res2, float %a0, i32 3
2138 ret <4 x float> %res3
2141 define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind {
2142 ; X86-SSE-LABEL: test_MM_SET_ROUNDING_MODE:
2144 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
2145 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2146 ; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
2147 ; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
2148 ; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
2149 ; X86-SSE-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff]
2150 ; X86-SSE-NEXT: # imm = 0x9FFF
2151 ; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
2152 ; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
2153 ; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
2154 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
2155 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2157 ; X86-AVX-LABEL: test_MM_SET_ROUNDING_MODE:
2159 ; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
2160 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2161 ; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
2162 ; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
2163 ; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
2164 ; X86-AVX-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff]
2165 ; X86-AVX-NEXT: # imm = 0x9FFF
2166 ; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
2167 ; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
2168 ; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
2169 ; X86-AVX-NEXT: popl %eax # encoding: [0x58]
2170 ; X86-AVX-NEXT: retl # encoding: [0xc3]
2172 ; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE:
2174 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
2175 ; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
2176 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
2177 ; X64-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff]
2178 ; X64-SSE-NEXT: # imm = 0x9FFF
2179 ; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
2180 ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
2181 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
2182 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2184 ; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE:
2186 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
2187 ; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
2188 ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
2189 ; X64-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff]
2190 ; X64-AVX-NEXT: # imm = 0x9FFF
2191 ; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
2192 ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
2193 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
2194 ; X64-AVX-NEXT: retq # encoding: [0xc3]
2195 %1 = alloca i32, align 4
2196 %2 = bitcast ptr %1 to ptr
2197 call void @llvm.x86.sse.stmxcsr(ptr %2)
2198 %3 = load i32, ptr %1
2199 %4 = and i32 %3, -24577
2201 store i32 %5, ptr %1
2202 call void @llvm.x86.sse.ldmxcsr(ptr %2)
2206 define <4 x float> @test_mm_set_ss(float %a0) nounwind {
2207 ; X86-SSE-LABEL: test_mm_set_ss:
2209 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2210 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04]
2211 ; X86-SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
2212 ; X86-SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1]
2213 ; X86-SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3]
2214 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2216 ; X86-AVX1-LABEL: test_mm_set_ss:
2217 ; X86-AVX1: # %bb.0:
2218 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2219 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2220 ; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
2221 ; X86-AVX1-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
2222 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
2223 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2225 ; X86-AVX512-LABEL: test_mm_set_ss:
2226 ; X86-AVX512: # %bb.0:
2227 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2228 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2229 ; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
2230 ; X86-AVX512-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
2231 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
2232 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2234 ; X64-SSE-LABEL: test_mm_set_ss:
2236 ; X64-SSE-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9]
2237 ; X64-SSE-NEXT: movss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0x10,0xc8]
2238 ; X64-SSE-NEXT: # xmm1 = xmm0[0],xmm1[1,2,3]
2239 ; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
2240 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2242 ; X64-AVX-LABEL: test_mm_set_ss:
2244 ; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
2245 ; X64-AVX-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
2246 ; X64-AVX-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
2247 ; X64-AVX-NEXT: retq # encoding: [0xc3]
2248 %res0 = insertelement <4 x float> undef, float %a0, i32 0
2249 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1
2250 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2
2251 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3
2252 ret <4 x float> %res3
2255 define <4 x float> @test_mm_set1_ps(float %a0) nounwind {
2256 ; X86-SSE-LABEL: test_mm_set1_ps:
2258 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2259 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
2260 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2261 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2262 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2264 ; X86-AVX1-LABEL: test_mm_set1_ps:
2265 ; X86-AVX1: # %bb.0:
2266 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2267 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2268 ; X86-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
2269 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2270 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2272 ; X86-AVX512-LABEL: test_mm_set1_ps:
2273 ; X86-AVX512: # %bb.0:
2274 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2275 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
2276 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2277 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2279 ; X64-SSE-LABEL: test_mm_set1_ps:
2281 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2282 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2283 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2285 ; X64-AVX1-LABEL: test_mm_set1_ps:
2286 ; X64-AVX1: # %bb.0:
2287 ; X64-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
2288 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2289 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2291 ; X64-AVX512-LABEL: test_mm_set1_ps:
2292 ; X64-AVX512: # %bb.0:
2293 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2294 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2295 %res0 = insertelement <4 x float> undef, float %a0, i32 0
2296 %res1 = insertelement <4 x float> %res0, float %a0, i32 1
2297 %res2 = insertelement <4 x float> %res1, float %a0, i32 2
2298 %res3 = insertelement <4 x float> %res2, float %a0, i32 3
2299 ret <4 x float> %res3
2302 define void @test_mm_setcsr(i32 %a0) nounwind {
2303 ; X86-SSE-LABEL: test_mm_setcsr:
2305 ; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04]
2306 ; X86-SSE-NEXT: ldmxcsr (%eax) # encoding: [0x0f,0xae,0x10]
2307 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2309 ; X86-AVX-LABEL: test_mm_setcsr:
2311 ; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04]
2312 ; X86-AVX-NEXT: vldmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x10]
2313 ; X86-AVX-NEXT: retl # encoding: [0xc3]
2315 ; X64-SSE-LABEL: test_mm_setcsr:
2317 ; X64-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc]
2318 ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
2319 ; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
2320 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2322 ; X64-AVX-LABEL: test_mm_setcsr:
2324 ; X64-AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc]
2325 ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
2326 ; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
2327 ; X64-AVX-NEXT: retq # encoding: [0xc3]
2328 %st = alloca i32, align 4
2329 store i32 %a0, ptr %st, align 4
2330 %bc = bitcast ptr %st to ptr
2331 call void @llvm.x86.sse.ldmxcsr(ptr %bc)
2335 define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) nounwind {
2336 ; X86-SSE-LABEL: test_mm_setr_ps:
2338 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2339 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
2340 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2341 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
2342 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
2343 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2344 ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2345 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08]
2346 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2347 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
2348 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
2349 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2350 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
2351 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
2352 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2354 ; X86-AVX1-LABEL: test_mm_setr_ps:
2355 ; X86-AVX1: # %bb.0:
2356 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2357 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10]
2358 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2359 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
2360 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2361 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08]
2362 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
2363 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04]
2364 ; X86-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10]
2365 ; X86-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2366 ; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20]
2367 ; X86-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
2368 ; X86-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30]
2369 ; X86-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0]
2370 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2372 ; X86-AVX512-LABEL: test_mm_setr_ps:
2373 ; X86-AVX512: # %bb.0:
2374 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2375 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10]
2376 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2377 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
2378 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2379 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08]
2380 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
2381 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04]
2382 ; X86-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10]
2383 ; X86-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2384 ; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20]
2385 ; X86-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
2386 ; X86-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30]
2387 ; X86-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0]
2388 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2390 ; X64-SSE-LABEL: test_mm_setr_ps:
2392 ; X64-SSE-NEXT: unpcklps %xmm3, %xmm2 # encoding: [0x0f,0x14,0xd3]
2393 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2394 ; X64-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1]
2395 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2396 ; X64-SSE-NEXT: movlhps %xmm2, %xmm0 # encoding: [0x0f,0x16,0xc2]
2397 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0]
2398 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2400 ; X64-AVX1-LABEL: test_mm_setr_ps:
2401 ; X64-AVX1: # %bb.0:
2402 ; X64-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10]
2403 ; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
2404 ; X64-AVX1-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20]
2405 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
2406 ; X64-AVX1-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30]
2407 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0]
2408 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2410 ; X64-AVX512-LABEL: test_mm_setr_ps:
2411 ; X64-AVX512: # %bb.0:
2412 ; X64-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10]
2413 ; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
2414 ; X64-AVX512-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20]
2415 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
2416 ; X64-AVX512-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30]
2417 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0]
2418 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2419 %res0 = insertelement <4 x float> undef, float %a0, i32 0
2420 %res1 = insertelement <4 x float> %res0, float %a1, i32 1
2421 %res2 = insertelement <4 x float> %res1, float %a2, i32 2
2422 %res3 = insertelement <4 x float> %res2, float %a3, i32 3
2423 ret <4 x float> %res3
2426 define <4 x float> @test_mm_setzero_ps() {
2427 ; SSE-LABEL: test_mm_setzero_ps:
2429 ; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
2430 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2432 ; AVX1-LABEL: test_mm_setzero_ps:
2434 ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
2435 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2437 ; AVX512-LABEL: test_mm_setzero_ps:
2439 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
2440 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2441 ret <4 x float> zeroinitializer
2444 define void @test_mm_sfence() nounwind {
2445 ; CHECK-LABEL: test_mm_sfence:
2447 ; CHECK-NEXT: sfence # encoding: [0x0f,0xae,0xf8]
2448 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2449 call void @llvm.x86.sse.sfence()
2452 declare void @llvm.x86.sse.sfence() nounwind readnone
2454 define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
2455 ; SSE-LABEL: test_mm_shuffle_ps:
2457 ; SSE-NEXT: shufps $0, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x00]
2458 ; SSE-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0]
2459 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2461 ; AVX1-LABEL: test_mm_shuffle_ps:
2463 ; AVX1-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc1,0x00]
2464 ; AVX1-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0]
2465 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2467 ; AVX512-LABEL: test_mm_shuffle_ps:
2469 ; AVX512-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc1,0x00]
2470 ; AVX512-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0]
2471 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2472 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
2473 ret <4 x float> %res
2476 define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) {
2477 ; SSE-LABEL: test_mm_sqrt_ps:
2479 ; SSE-NEXT: sqrtps %xmm0, %xmm0 # encoding: [0x0f,0x51,0xc0]
2480 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2482 ; AVX1-LABEL: test_mm_sqrt_ps:
2484 ; AVX1-NEXT: vsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x51,0xc0]
2485 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2487 ; AVX512-LABEL: test_mm_sqrt_ps:
2489 ; AVX512-NEXT: vsqrtps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0]
2490 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2491 %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0)
2492 ret <4 x float> %res
2494 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone
2496 define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) {
2497 ; SSE-LABEL: test_mm_sqrt_ss:
2499 ; SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0]
2500 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2502 ; AVX1-LABEL: test_mm_sqrt_ss:
2504 ; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0]
2505 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2507 ; AVX512-LABEL: test_mm_sqrt_ss:
2509 ; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
2510 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2511 %ext = extractelement <4 x float> %a0, i32 0
2512 %sqrt = call float @llvm.sqrt.f32(float %ext)
2513 %ins = insertelement <4 x float> %a0, float %sqrt, i32 0
2514 ret <4 x float> %ins
2516 declare float @llvm.sqrt.f32(float) nounwind readnone
2518 define float @test_mm_sqrt_ss_scalar(float %a0) {
2519 ; X86-SSE-LABEL: test_mm_sqrt_ss_scalar:
2521 ; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
2522 ; X86-SSE-NEXT: .cfi_def_cfa_offset 8
2523 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2524 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08]
2525 ; X86-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0]
2526 ; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24]
2527 ; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
2528 ; X86-SSE-NEXT: popl %eax # encoding: [0x58]
2529 ; X86-SSE-NEXT: .cfi_def_cfa_offset 4
2530 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2532 ; X86-AVX1-LABEL: test_mm_sqrt_ss_scalar:
2533 ; X86-AVX1: # %bb.0:
2534 ; X86-AVX1-NEXT: pushl %eax # encoding: [0x50]
2535 ; X86-AVX1-NEXT: .cfi_def_cfa_offset 8
2536 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2537 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
2538 ; X86-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0]
2539 ; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24]
2540 ; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
2541 ; X86-AVX1-NEXT: popl %eax # encoding: [0x58]
2542 ; X86-AVX1-NEXT: .cfi_def_cfa_offset 4
2543 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2545 ; X86-AVX512-LABEL: test_mm_sqrt_ss_scalar:
2546 ; X86-AVX512: # %bb.0:
2547 ; X86-AVX512-NEXT: pushl %eax # encoding: [0x50]
2548 ; X86-AVX512-NEXT: .cfi_def_cfa_offset 8
2549 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2550 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
2551 ; X86-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
2552 ; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24]
2553 ; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24]
2554 ; X86-AVX512-NEXT: popl %eax # encoding: [0x58]
2555 ; X86-AVX512-NEXT: .cfi_def_cfa_offset 4
2556 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2558 ; X64-SSE-LABEL: test_mm_sqrt_ss_scalar:
2560 ; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0]
2561 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2563 ; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar:
2564 ; X64-AVX1: # %bb.0:
2565 ; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0]
2566 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2568 ; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar:
2569 ; X64-AVX512: # %bb.0:
2570 ; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0]
2571 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2572 %sqrt = call float @llvm.sqrt.f32(float %a0)
2576 define void @test_mm_store_ps(ptr %a0, <4 x float> %a1) {
2577 ; X86-SSE-LABEL: test_mm_store_ps:
2579 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2580 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
2581 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2583 ; X86-AVX1-LABEL: test_mm_store_ps:
2584 ; X86-AVX1: # %bb.0:
2585 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2586 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
2587 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2589 ; X86-AVX512-LABEL: test_mm_store_ps:
2590 ; X86-AVX512: # %bb.0:
2591 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2592 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
2593 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2595 ; X64-SSE-LABEL: test_mm_store_ps:
2597 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
2598 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2600 ; X64-AVX1-LABEL: test_mm_store_ps:
2601 ; X64-AVX1: # %bb.0:
2602 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
2603 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2605 ; X64-AVX512-LABEL: test_mm_store_ps:
2606 ; X64-AVX512: # %bb.0:
2607 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
2608 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2609 %arg0 = bitcast ptr %a0 to ptr
2610 store <4 x float> %a1, ptr %arg0, align 16
2614 define void @test_mm_store_ps1(ptr %a0, <4 x float> %a1) {
2615 ; X86-SSE-LABEL: test_mm_store_ps1:
2617 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2618 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2619 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2620 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
2621 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2623 ; X86-AVX1-LABEL: test_mm_store_ps1:
2624 ; X86-AVX1: # %bb.0:
2625 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2626 ; X86-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
2627 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2628 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
2629 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2631 ; X86-AVX512-LABEL: test_mm_store_ps1:
2632 ; X86-AVX512: # %bb.0:
2633 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2634 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2635 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
2636 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2638 ; X64-SSE-LABEL: test_mm_store_ps1:
2640 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2641 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2642 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
2643 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2645 ; X64-AVX1-LABEL: test_mm_store_ps1:
2646 ; X64-AVX1: # %bb.0:
2647 ; X64-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
2648 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2649 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
2650 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2652 ; X64-AVX512-LABEL: test_mm_store_ps1:
2653 ; X64-AVX512: # %bb.0:
2654 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2655 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
2656 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2657 %arg0 = bitcast ptr %a0 to ptr
2658 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer
2659 store <4 x float> %shuf, ptr %arg0, align 16
2663 define void @test_mm_store_ss(ptr %a0, <4 x float> %a1) {
2664 ; X86-SSE-LABEL: test_mm_store_ss:
2666 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2667 ; X86-SSE-NEXT: movss %xmm0, (%eax) # encoding: [0xf3,0x0f,0x11,0x00]
2668 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2670 ; X86-AVX1-LABEL: test_mm_store_ss:
2671 ; X86-AVX1: # %bb.0:
2672 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2673 ; X86-AVX1-NEXT: vmovss %xmm0, (%eax) # encoding: [0xc5,0xfa,0x11,0x00]
2674 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2676 ; X86-AVX512-LABEL: test_mm_store_ss:
2677 ; X86-AVX512: # %bb.0:
2678 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2679 ; X86-AVX512-NEXT: vmovss %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x00]
2680 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2682 ; X64-SSE-LABEL: test_mm_store_ss:
2684 ; X64-SSE-NEXT: movss %xmm0, (%rdi) # encoding: [0xf3,0x0f,0x11,0x07]
2685 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2687 ; X64-AVX1-LABEL: test_mm_store_ss:
2688 ; X64-AVX1: # %bb.0:
2689 ; X64-AVX1-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
2690 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2692 ; X64-AVX512-LABEL: test_mm_store_ss:
2693 ; X64-AVX512: # %bb.0:
2694 ; X64-AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
2695 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2696 %ext = extractelement <4 x float> %a1, i32 0
2697 store float %ext, ptr %a0, align 1
2701 define void @test_mm_store1_ps(ptr %a0, <4 x float> %a1) {
2702 ; X86-SSE-LABEL: test_mm_store1_ps:
2704 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2705 ; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2706 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2707 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
2708 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2710 ; X86-AVX1-LABEL: test_mm_store1_ps:
2711 ; X86-AVX1: # %bb.0:
2712 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2713 ; X86-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
2714 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2715 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
2716 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2718 ; X86-AVX512-LABEL: test_mm_store1_ps:
2719 ; X86-AVX512: # %bb.0:
2720 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2721 ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2722 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
2723 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2725 ; X64-SSE-LABEL: test_mm_store1_ps:
2727 ; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00]
2728 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
2729 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
2730 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2732 ; X64-AVX1-LABEL: test_mm_store1_ps:
2733 ; X64-AVX1: # %bb.0:
2734 ; X64-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
2735 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
2736 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
2737 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2739 ; X64-AVX512-LABEL: test_mm_store1_ps:
2740 ; X64-AVX512: # %bb.0:
2741 ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
2742 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
2743 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2744 %arg0 = bitcast ptr %a0 to ptr
2745 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer
2746 store <4 x float> %shuf, ptr %arg0, align 16
2750 define void @test_mm_storeh_pi(ptr %a0, <4 x float> %a1) nounwind {
2751 ; X86-SSE1-LABEL: test_mm_storeh_pi:
2752 ; X86-SSE1: # %bb.0:
2753 ; X86-SSE1-NEXT: pushl %ebp # encoding: [0x55]
2754 ; X86-SSE1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
2755 ; X86-SSE1-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0]
2756 ; X86-SSE1-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20]
2757 ; X86-SSE1-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08]
2758 ; X86-SSE1-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24]
2759 ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
2760 ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
2761 ; X86-SSE1-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04]
2762 ; X86-SSE1-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08]
2763 ; X86-SSE1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
2764 ; X86-SSE1-NEXT: popl %ebp # encoding: [0x5d]
2765 ; X86-SSE1-NEXT: retl # encoding: [0xc3]
2767 ; X86-SSE2-LABEL: test_mm_storeh_pi:
2768 ; X86-SSE2: # %bb.0:
2769 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2770 ; X86-SSE2-NEXT: movhps %xmm0, (%eax) # encoding: [0x0f,0x17,0x00]
2771 ; X86-SSE2-NEXT: retl # encoding: [0xc3]
2773 ; X86-AVX1-LABEL: test_mm_storeh_pi:
2774 ; X86-AVX1: # %bb.0:
2775 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2776 ; X86-AVX1-NEXT: vmovhps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x17,0x00]
2777 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2779 ; X86-AVX512-LABEL: test_mm_storeh_pi:
2780 ; X86-AVX512: # %bb.0:
2781 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2782 ; X86-AVX512-NEXT: vmovhps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x00]
2783 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2785 ; X64-SSE1-LABEL: test_mm_storeh_pi:
2786 ; X64-SSE1: # %bb.0:
2787 ; X64-SSE1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8]
2788 ; X64-SSE1-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xf0]
2789 ; X64-SSE1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2790 ; X64-SSE1-NEXT: retq # encoding: [0xc3]
2792 ; X64-SSE2-LABEL: test_mm_storeh_pi:
2793 ; X64-SSE2: # %bb.0:
2794 ; X64-SSE2-NEXT: punpckhqdq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x6d,0xc0]
2795 ; X64-SSE2-NEXT: # xmm0 = xmm0[1,1]
2796 ; X64-SSE2-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
2797 ; X64-SSE2-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2798 ; X64-SSE2-NEXT: retq # encoding: [0xc3]
2800 ; X64-AVX1-LABEL: test_mm_storeh_pi:
2801 ; X64-AVX1: # %bb.0:
2802 ; X64-AVX1-NEXT: vpextrq $1, %xmm0, %rax # encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01]
2803 ; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2804 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2806 ; X64-AVX512-LABEL: test_mm_storeh_pi:
2807 ; X64-AVX512: # %bb.0:
2808 ; X64-AVX512-NEXT: vpextrq $1, %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01]
2809 ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2810 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2811 %ptr = bitcast ptr %a0 to ptr
2812 %bc = bitcast <4 x float> %a1 to <2 x i64>
2813 %ext = extractelement <2 x i64> %bc, i32 1
2814 store i64 %ext, ptr %ptr
2818 define void @test_mm_storeh_pi2(ptr %a0, <4 x float> %a1) nounwind {
2819 ; X86-SSE-LABEL: test_mm_storeh_pi2:
2821 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2822 ; X86-SSE-NEXT: movhps %xmm0, (%eax) # encoding: [0x0f,0x17,0x00]
2823 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2825 ; X86-AVX1-LABEL: test_mm_storeh_pi2:
2826 ; X86-AVX1: # %bb.0:
2827 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2828 ; X86-AVX1-NEXT: vmovhps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x17,0x00]
2829 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2831 ; X86-AVX512-LABEL: test_mm_storeh_pi2:
2832 ; X86-AVX512: # %bb.0:
2833 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2834 ; X86-AVX512-NEXT: vmovhps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x00]
2835 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2837 ; X64-SSE-LABEL: test_mm_storeh_pi2:
2839 ; X64-SSE-NEXT: movhps %xmm0, (%rdi) # encoding: [0x0f,0x17,0x07]
2840 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2842 ; X64-AVX1-LABEL: test_mm_storeh_pi2:
2843 ; X64-AVX1: # %bb.0:
2844 ; X64-AVX1-NEXT: vmovhps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x17,0x07]
2845 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2847 ; X64-AVX512-LABEL: test_mm_storeh_pi2:
2848 ; X64-AVX512: # %bb.0:
2849 ; X64-AVX512-NEXT: vmovhps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x07]
2850 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2851 %ptr = bitcast ptr %a0 to ptr
2852 %ext = shufflevector <4 x float> %a1, <4 x float> undef, <2 x i32> <i32 2, i32 3>
2853 store <2 x float> %ext, ptr %ptr
2857 define void @test_mm_storel_pi(ptr %a0, <4 x float> %a1) nounwind {
2858 ; X86-SSE1-LABEL: test_mm_storel_pi:
2859 ; X86-SSE1: # %bb.0:
2860 ; X86-SSE1-NEXT: pushl %ebp # encoding: [0x55]
2861 ; X86-SSE1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
2862 ; X86-SSE1-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0]
2863 ; X86-SSE1-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20]
2864 ; X86-SSE1-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08]
2865 ; X86-SSE1-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24]
2866 ; X86-SSE1-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24]
2867 ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
2868 ; X86-SSE1-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04]
2869 ; X86-SSE1-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08]
2870 ; X86-SSE1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
2871 ; X86-SSE1-NEXT: popl %ebp # encoding: [0x5d]
2872 ; X86-SSE1-NEXT: retl # encoding: [0xc3]
2874 ; X86-SSE2-LABEL: test_mm_storel_pi:
2875 ; X86-SSE2: # %bb.0:
2876 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2877 ; X86-SSE2-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00]
2878 ; X86-SSE2-NEXT: retl # encoding: [0xc3]
2880 ; X86-AVX1-LABEL: test_mm_storel_pi:
2881 ; X86-AVX1: # %bb.0:
2882 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2883 ; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00]
2884 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2886 ; X86-AVX512-LABEL: test_mm_storel_pi:
2887 ; X86-AVX512: # %bb.0:
2888 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2889 ; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
2890 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2892 ; X64-SSE1-LABEL: test_mm_storel_pi:
2893 ; X64-SSE1: # %bb.0:
2894 ; X64-SSE1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8]
2895 ; X64-SSE1-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8]
2896 ; X64-SSE1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2897 ; X64-SSE1-NEXT: retq # encoding: [0xc3]
2899 ; X64-SSE2-LABEL: test_mm_storel_pi:
2900 ; X64-SSE2: # %bb.0:
2901 ; X64-SSE2-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
2902 ; X64-SSE2-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2903 ; X64-SSE2-NEXT: retq # encoding: [0xc3]
2905 ; X64-AVX1-LABEL: test_mm_storel_pi:
2906 ; X64-AVX1: # %bb.0:
2907 ; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
2908 ; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2909 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2911 ; X64-AVX512-LABEL: test_mm_storel_pi:
2912 ; X64-AVX512: # %bb.0:
2913 ; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
2914 ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
2915 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2916 %ptr = bitcast ptr %a0 to ptr
2917 %bc = bitcast <4 x float> %a1 to <2 x i64>
2918 %ext = extractelement <2 x i64> %bc, i32 0
2919 store i64 %ext, ptr %ptr
2923 ; FIXME: Switch the frontend to use this code.
2924 define void @test_mm_storel_pi2(ptr %a0, <4 x float> %a1) nounwind {
2925 ; X86-SSE-LABEL: test_mm_storel_pi2:
2927 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2928 ; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00]
2929 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2931 ; X86-AVX1-LABEL: test_mm_storel_pi2:
2932 ; X86-AVX1: # %bb.0:
2933 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2934 ; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00]
2935 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2937 ; X86-AVX512-LABEL: test_mm_storel_pi2:
2938 ; X86-AVX512: # %bb.0:
2939 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2940 ; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
2941 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2943 ; X64-SSE-LABEL: test_mm_storel_pi2:
2945 ; X64-SSE-NEXT: movlps %xmm0, (%rdi) # encoding: [0x0f,0x13,0x07]
2946 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2948 ; X64-AVX1-LABEL: test_mm_storel_pi2:
2949 ; X64-AVX1: # %bb.0:
2950 ; X64-AVX1-NEXT: vmovlps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x13,0x07]
2951 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2953 ; X64-AVX512-LABEL: test_mm_storel_pi2:
2954 ; X64-AVX512: # %bb.0:
2955 ; X64-AVX512-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
2956 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2957 %ptr = bitcast ptr %a0 to ptr
2958 %ext = shufflevector <4 x float> %a1, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2959 store <2 x float> %ext, ptr %ptr
2963 define void @test_mm_storer_ps(ptr %a0, <4 x float> %a1) {
2964 ; X86-SSE-LABEL: test_mm_storer_ps:
2966 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2967 ; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b]
2968 ; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0]
2969 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
2970 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2972 ; X86-AVX1-LABEL: test_mm_storer_ps:
2973 ; X86-AVX1: # %bb.0:
2974 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2975 ; X86-AVX1-NEXT: vshufps $27, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x1b]
2976 ; X86-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0]
2977 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
2978 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2980 ; X86-AVX512-LABEL: test_mm_storer_ps:
2981 ; X86-AVX512: # %bb.0:
2982 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2983 ; X86-AVX512-NEXT: vshufps $27, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0x1b]
2984 ; X86-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0]
2985 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
2986 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2988 ; X64-SSE-LABEL: test_mm_storer_ps:
2990 ; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b]
2991 ; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0]
2992 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
2993 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2995 ; X64-AVX1-LABEL: test_mm_storer_ps:
2996 ; X64-AVX1: # %bb.0:
2997 ; X64-AVX1-NEXT: vshufps $27, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x1b]
2998 ; X64-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0]
2999 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
3000 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
3002 ; X64-AVX512-LABEL: test_mm_storer_ps:
3003 ; X64-AVX512: # %bb.0:
3004 ; X64-AVX512-NEXT: vshufps $27, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc0,0x1b]
3005 ; X64-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0]
3006 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
3007 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
3008 %arg0 = bitcast ptr %a0 to ptr
3009 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3010 store <4 x float> %shuf, ptr %arg0, align 16
3014 define void @test_mm_storeu_ps(ptr %a0, <4 x float> %a1) {
3015 ; X86-SSE-LABEL: test_mm_storeu_ps:
3017 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3018 ; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
3019 ; X86-SSE-NEXT: retl # encoding: [0xc3]
3021 ; X86-AVX1-LABEL: test_mm_storeu_ps:
3022 ; X86-AVX1: # %bb.0:
3023 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3024 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
3025 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
3027 ; X86-AVX512-LABEL: test_mm_storeu_ps:
3028 ; X86-AVX512: # %bb.0:
3029 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3030 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
3031 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
3033 ; X64-SSE-LABEL: test_mm_storeu_ps:
3035 ; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07]
3036 ; X64-SSE-NEXT: retq # encoding: [0xc3]
3038 ; X64-AVX1-LABEL: test_mm_storeu_ps:
3039 ; X64-AVX1: # %bb.0:
3040 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
3041 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
3043 ; X64-AVX512-LABEL: test_mm_storeu_ps:
3044 ; X64-AVX512: # %bb.0:
3045 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
3046 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
3047 %arg0 = bitcast ptr %a0 to ptr
3048 store <4 x float> %a1, ptr %arg0, align 1
3052 define void @test_mm_stream_ps(ptr %a0, <4 x float> %a1) {
3053 ; X86-SSE-LABEL: test_mm_stream_ps:
3055 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3056 ; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00]
3057 ; X86-SSE-NEXT: retl # encoding: [0xc3]
3059 ; X86-AVX1-LABEL: test_mm_stream_ps:
3060 ; X86-AVX1: # %bb.0:
3061 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3062 ; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00]
3063 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
3065 ; X86-AVX512-LABEL: test_mm_stream_ps:
3066 ; X86-AVX512: # %bb.0:
3067 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3068 ; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00]
3069 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
3071 ; X64-SSE-LABEL: test_mm_stream_ps:
3073 ; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07]
3074 ; X64-SSE-NEXT: retq # encoding: [0xc3]
3076 ; X64-AVX1-LABEL: test_mm_stream_ps:
3077 ; X64-AVX1: # %bb.0:
3078 ; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07]
3079 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
3081 ; X64-AVX512-LABEL: test_mm_stream_ps:
3082 ; X64-AVX512: # %bb.0:
3083 ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
3084 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
3085 %arg0 = bitcast ptr %a0 to ptr
3086 store <4 x float> %a1, ptr %arg0, align 16, !nontemporal !0
3090 define <4 x float> @test_mm_sub_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
3091 ; SSE-LABEL: test_mm_sub_ps:
3093 ; SSE-NEXT: subps %xmm1, %xmm0 # encoding: [0x0f,0x5c,0xc1]
3094 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3096 ; AVX1-LABEL: test_mm_sub_ps:
3098 ; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5c,0xc1]
3099 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3101 ; AVX512-LABEL: test_mm_sub_ps:
3103 ; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1]
3104 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3105 %res = fsub <4 x float> %a0, %a1
3106 ret <4 x float> %res
3109 define <4 x float> @test_mm_sub_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3110 ; SSE-LABEL: test_mm_sub_ss:
3112 ; SSE-NEXT: subss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5c,0xc1]
3113 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3115 ; AVX1-LABEL: test_mm_sub_ss:
3117 ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5c,0xc1]
3118 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3120 ; AVX512-LABEL: test_mm_sub_ss:
3122 ; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1]
3123 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3124 %ext0 = extractelement <4 x float> %a0, i32 0
3125 %ext1 = extractelement <4 x float> %a1, i32 0
3126 %fsub = fsub float %ext0, %ext1
3127 %res = insertelement <4 x float> %a0, float %fsub, i32 0
3128 ret <4 x float> %res
3131 define void @test_MM_TRANSPOSE4_PS(ptr %a0, ptr %a1, ptr %a2, ptr %a3) nounwind {
3132 ; X86-SSE-LABEL: test_MM_TRANSPOSE4_PS:
3134 ; X86-SSE-NEXT: pushl %esi # encoding: [0x56]
3135 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
3136 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
3137 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
3138 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
3139 ; X86-SSE-NEXT: movaps (%esi), %xmm1 # encoding: [0x0f,0x28,0x0e]
3140 ; X86-SSE-NEXT: movaps (%edx), %xmm2 # encoding: [0x0f,0x28,0x12]
3141 ; X86-SSE-NEXT: movaps (%ecx), %xmm0 # encoding: [0x0f,0x28,0x01]
3142 ; X86-SSE-NEXT: movaps (%eax), %xmm3 # encoding: [0x0f,0x28,0x18]
3143 ; X86-SSE-NEXT: movaps %xmm1, %xmm4 # encoding: [0x0f,0x28,0xe1]
3144 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm4 # encoding: [0x0f,0x14,0xe2]
3145 ; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
3146 ; X86-SSE-NEXT: movaps %xmm0, %xmm5 # encoding: [0x0f,0x28,0xe8]
3147 ; X86-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb]
3148 ; X86-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
3149 ; X86-SSE-NEXT: unpckhps %xmm2, %xmm1 # encoding: [0x0f,0x15,0xca]
3150 ; X86-SSE-NEXT: # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
3151 ; X86-SSE-NEXT: unpckhps %xmm3, %xmm0 # encoding: [0x0f,0x15,0xc3]
3152 ; X86-SSE-NEXT: # xmm0 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
3153 ; X86-SSE-NEXT: movaps %xmm4, %xmm2 # encoding: [0x0f,0x28,0xd4]
3154 ; X86-SSE-NEXT: movlhps %xmm5, %xmm2 # encoding: [0x0f,0x16,0xd5]
3155 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm5[0]
3156 ; X86-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec]
3157 ; X86-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1]
3158 ; X86-SSE-NEXT: movaps %xmm1, %xmm3 # encoding: [0x0f,0x28,0xd9]
3159 ; X86-SSE-NEXT: movlhps %xmm0, %xmm3 # encoding: [0x0f,0x16,0xd8]
3160 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0]
3161 ; X86-SSE-NEXT: movhlps %xmm1, %xmm0 # encoding: [0x0f,0x12,0xc1]
3162 ; X86-SSE-NEXT: # xmm0 = xmm1[1],xmm0[1]
3163 ; X86-SSE-NEXT: movaps %xmm2, (%esi) # encoding: [0x0f,0x29,0x16]
3164 ; X86-SSE-NEXT: movaps %xmm5, (%edx) # encoding: [0x0f,0x29,0x2a]
3165 ; X86-SSE-NEXT: movaps %xmm3, (%ecx) # encoding: [0x0f,0x29,0x19]
3166 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
3167 ; X86-SSE-NEXT: popl %esi # encoding: [0x5e]
3168 ; X86-SSE-NEXT: retl # encoding: [0xc3]
3170 ; X86-AVX1-LABEL: test_MM_TRANSPOSE4_PS:
3171 ; X86-AVX1: # %bb.0:
3172 ; X86-AVX1-NEXT: pushl %esi # encoding: [0x56]
3173 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
3174 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
3175 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
3176 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
3177 ; X86-AVX1-NEXT: vmovaps (%esi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x06]
3178 ; X86-AVX1-NEXT: vmovaps (%edx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a]
3179 ; X86-AVX1-NEXT: vmovaps (%ecx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x11]
3180 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm3 # encoding: [0xc5,0xf8,0x28,0x18]
3181 ; X86-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1]
3182 ; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3183 ; X86-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb]
3184 ; X86-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3185 ; X86-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1]
3186 ; X86-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3187 ; X86-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb]
3188 ; X86-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3189 ; X86-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5]
3190 ; X86-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0]
3191 ; X86-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd]
3192 ; X86-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1]
3193 ; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1]
3194 ; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0]
3195 ; X86-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
3196 ; X86-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1]
3197 ; X86-AVX1-NEXT: vmovaps %xmm2, (%esi) # encoding: [0xc5,0xf8,0x29,0x16]
3198 ; X86-AVX1-NEXT: vmovaps %xmm3, (%edx) # encoding: [0xc5,0xf8,0x29,0x1a]
3199 ; X86-AVX1-NEXT: vmovaps %xmm4, (%ecx) # encoding: [0xc5,0xf8,0x29,0x21]
3200 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
3201 ; X86-AVX1-NEXT: popl %esi # encoding: [0x5e]
3202 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
3204 ; X86-AVX512-LABEL: test_MM_TRANSPOSE4_PS:
3205 ; X86-AVX512: # %bb.0:
3206 ; X86-AVX512-NEXT: pushl %esi # encoding: [0x56]
3207 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
3208 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
3209 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
3210 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
3211 ; X86-AVX512-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06]
3212 ; X86-AVX512-NEXT: vmovaps (%edx), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0a]
3213 ; X86-AVX512-NEXT: vmovaps (%ecx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x11]
3214 ; X86-AVX512-NEXT: vmovaps (%eax), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x18]
3215 ; X86-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1]
3216 ; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3217 ; X86-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb]
3218 ; X86-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3219 ; X86-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
3220 ; X86-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3221 ; X86-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb]
3222 ; X86-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3223 ; X86-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5]
3224 ; X86-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0]
3225 ; X86-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd]
3226 ; X86-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1]
3227 ; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1]
3228 ; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0]
3229 ; X86-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
3230 ; X86-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1]
3231 ; X86-AVX512-NEXT: vmovaps %xmm2, (%esi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x16]
3232 ; X86-AVX512-NEXT: vmovaps %xmm3, (%edx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1a]
3233 ; X86-AVX512-NEXT: vmovaps %xmm4, (%ecx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x21]
3234 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
3235 ; X86-AVX512-NEXT: popl %esi # encoding: [0x5e]
3236 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
3238 ; X64-SSE-LABEL: test_MM_TRANSPOSE4_PS:
3240 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
3241 ; X64-SSE-NEXT: movaps (%rsi), %xmm1 # encoding: [0x0f,0x28,0x0e]
3242 ; X64-SSE-NEXT: movaps (%rdx), %xmm2 # encoding: [0x0f,0x28,0x12]
3243 ; X64-SSE-NEXT: movaps (%rcx), %xmm3 # encoding: [0x0f,0x28,0x19]
3244 ; X64-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0]
3245 ; X64-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1]
3246 ; X64-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
3247 ; X64-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea]
3248 ; X64-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb]
3249 ; X64-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
3250 ; X64-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1]
3251 ; X64-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3252 ; X64-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3]
3253 ; X64-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3254 ; X64-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc]
3255 ; X64-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd]
3256 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0]
3257 ; X64-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec]
3258 ; X64-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1]
3259 ; X64-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
3260 ; X64-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda]
3261 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0]
3262 ; X64-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0]
3263 ; X64-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1]
3264 ; X64-SSE-NEXT: movaps %xmm1, (%rdi) # encoding: [0x0f,0x29,0x0f]
3265 ; X64-SSE-NEXT: movaps %xmm5, (%rsi) # encoding: [0x0f,0x29,0x2e]
3266 ; X64-SSE-NEXT: movaps %xmm3, (%rdx) # encoding: [0x0f,0x29,0x1a]
3267 ; X64-SSE-NEXT: movaps %xmm2, (%rcx) # encoding: [0x0f,0x29,0x11]
3268 ; X64-SSE-NEXT: retq # encoding: [0xc3]
3270 ; X64-AVX1-LABEL: test_MM_TRANSPOSE4_PS:
3271 ; X64-AVX1: # %bb.0:
3272 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
3273 ; X64-AVX1-NEXT: vmovaps (%rsi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0e]
3274 ; X64-AVX1-NEXT: vmovaps (%rdx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x12]
3275 ; X64-AVX1-NEXT: vmovaps (%rcx), %xmm3 # encoding: [0xc5,0xf8,0x28,0x19]
3276 ; X64-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1]
3277 ; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3278 ; X64-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb]
3279 ; X64-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3280 ; X64-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1]
3281 ; X64-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3282 ; X64-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb]
3283 ; X64-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3284 ; X64-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5]
3285 ; X64-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0]
3286 ; X64-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd]
3287 ; X64-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1]
3288 ; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1]
3289 ; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0]
3290 ; X64-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
3291 ; X64-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1]
3292 ; X64-AVX1-NEXT: vmovaps %xmm2, (%rdi) # encoding: [0xc5,0xf8,0x29,0x17]
3293 ; X64-AVX1-NEXT: vmovaps %xmm3, (%rsi) # encoding: [0xc5,0xf8,0x29,0x1e]
3294 ; X64-AVX1-NEXT: vmovaps %xmm4, (%rdx) # encoding: [0xc5,0xf8,0x29,0x22]
3295 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rcx) # encoding: [0xc5,0xf8,0x29,0x01]
3296 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
3298 ; X64-AVX512-LABEL: test_MM_TRANSPOSE4_PS:
3299 ; X64-AVX512: # %bb.0:
3300 ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
3301 ; X64-AVX512-NEXT: vmovaps (%rsi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0e]
3302 ; X64-AVX512-NEXT: vmovaps (%rdx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x12]
3303 ; X64-AVX512-NEXT: vmovaps (%rcx), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x19]
3304 ; X64-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1]
3305 ; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3306 ; X64-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb]
3307 ; X64-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3308 ; X64-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
3309 ; X64-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3310 ; X64-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb]
3311 ; X64-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3312 ; X64-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5]
3313 ; X64-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0]
3314 ; X64-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd]
3315 ; X64-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1]
3316 ; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1]
3317 ; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0]
3318 ; X64-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
3319 ; X64-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1]
3320 ; X64-AVX512-NEXT: vmovaps %xmm2, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x17]
3321 ; X64-AVX512-NEXT: vmovaps %xmm3, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1e]
3322 ; X64-AVX512-NEXT: vmovaps %xmm4, (%rdx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x22]
3323 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rcx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x01]
3324 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
3325 %row0 = load <4 x float>, ptr %a0, align 16
3326 %row1 = load <4 x float>, ptr %a1, align 16
3327 %row2 = load <4 x float>, ptr %a2, align 16
3328 %row3 = load <4 x float>, ptr %a3, align 16
3329 %tmp0 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3330 %tmp2 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3331 %tmp1 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3332 %tmp3 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3333 %res0 = shufflevector <4 x float> %tmp0, <4 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3334 %res1 = shufflevector <4 x float> %tmp2, <4 x float> %tmp0, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
3335 %res2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3336 %res3 = shufflevector <4 x float> %tmp3, <4 x float> %tmp1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
3337 store <4 x float> %res0, ptr %a0, align 16
3338 store <4 x float> %res1, ptr %a1, align 16
3339 store <4 x float> %res2, ptr %a2, align 16
3340 store <4 x float> %res3, ptr %a3, align 16
3344 define i32 @test_mm_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3345 ; SSE-LABEL: test_mm_ucomieq_ss:
3347 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1]
3348 ; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
3349 ; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
3350 ; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
3351 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3352 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3354 ; AVX1-LABEL: test_mm_ucomieq_ss:
3356 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1]
3357 ; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
3358 ; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
3359 ; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
3360 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3361 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3363 ; AVX512-LABEL: test_mm_ucomieq_ss:
3365 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
3366 ; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
3367 ; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
3368 ; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
3369 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3370 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3371 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
3374 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
3376 define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3377 ; SSE-LABEL: test_mm_ucomige_ss:
3379 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3380 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1]
3381 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3382 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3384 ; AVX1-LABEL: test_mm_ucomige_ss:
3386 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3387 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1]
3388 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3389 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3391 ; AVX512-LABEL: test_mm_ucomige_ss:
3393 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3394 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
3395 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3396 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3397 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1)
3400 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
3402 define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3403 ; SSE-LABEL: test_mm_ucomigt_ss:
3405 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3406 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1]
3407 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3408 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3410 ; AVX1-LABEL: test_mm_ucomigt_ss:
3412 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3413 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1]
3414 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3415 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3417 ; AVX512-LABEL: test_mm_ucomigt_ss:
3419 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3420 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
3421 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3422 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3423 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1)
3426 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
3428 define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3429 ; SSE-LABEL: test_mm_ucomile_ss:
3431 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3432 ; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8]
3433 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3434 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3436 ; AVX1-LABEL: test_mm_ucomile_ss:
3438 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3439 ; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8]
3440 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3441 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3443 ; AVX512-LABEL: test_mm_ucomile_ss:
3445 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3446 ; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8]
3447 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
3448 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3449 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1)
3452 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
3454 define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3455 ; SSE-LABEL: test_mm_ucomilt_ss:
3457 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3458 ; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8]
3459 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3460 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3462 ; AVX1-LABEL: test_mm_ucomilt_ss:
3464 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3465 ; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8]
3466 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3467 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3469 ; AVX512-LABEL: test_mm_ucomilt_ss:
3471 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
3472 ; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8]
3473 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
3474 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3475 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1)
3478 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
3480 define i32 @test_mm_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
3481 ; SSE-LABEL: test_mm_ucomineq_ss:
3483 ; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1]
3484 ; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
3485 ; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
3486 ; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
3487 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3488 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3490 ; AVX1-LABEL: test_mm_ucomineq_ss:
3492 ; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1]
3493 ; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
3494 ; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
3495 ; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
3496 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3497 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3499 ; AVX512-LABEL: test_mm_ucomineq_ss:
3501 ; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
3502 ; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
3503 ; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
3504 ; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
3505 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3506 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3507 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1)
3510 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
3512 define <4 x float> @test_mm_undefined_ps() {
3513 ; CHECK-LABEL: test_mm_undefined_ps:
3515 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3516 ret <4 x float> undef
3519 define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
3520 ; SSE-LABEL: test_mm_unpackhi_ps:
3522 ; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1]
3523 ; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3524 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3526 ; AVX1-LABEL: test_mm_unpackhi_ps:
3528 ; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1]
3529 ; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3530 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3532 ; AVX512-LABEL: test_mm_unpackhi_ps:
3534 ; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
3535 ; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3536 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3537 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3538 ret <4 x float> %res
3541 define <4 x float> @test_mm_unpacklo_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
3542 ; SSE-LABEL: test_mm_unpacklo_ps:
3544 ; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1]
3545 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3546 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3548 ; AVX1-LABEL: test_mm_unpacklo_ps:
3550 ; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1]
3551 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3552 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3554 ; AVX512-LABEL: test_mm_unpacklo_ps:
3556 ; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1]
3557 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3558 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3559 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3560 ret <4 x float> %res
3563 define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
3564 ; SSE-LABEL: test_mm_xor_ps:
3566 ; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1]
3567 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3569 ; AVX1-LABEL: test_mm_xor_ps:
3571 ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
3572 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3574 ; AVX512-LABEL: test_mm_xor_ps:
3576 ; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
3577 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3578 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
3579 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
3580 %res = xor <4 x i32> %arg0, %arg1
3581 %bc = bitcast <4 x i32> %res to <4 x float>