1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
3 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
4 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
5 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
6 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
7 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
8 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X32,SSE,X32-SSE
9 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X32,AVX,X32-AVX,AVX1,X32-AVX1
10 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X32,AVX,X32-AVX,AVX512,X32-AVX512
12 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
14 define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
15 ; SSE-LABEL: test_mm_add_epi8:
17 ; SSE-NEXT: paddb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfc,0xc1]
18 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
20 ; AVX1-LABEL: test_mm_add_epi8:
22 ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
23 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
25 ; AVX512-LABEL: test_mm_add_epi8:
27 ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
28 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
29 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
30 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
31 %res = add <16 x i8> %arg0, %arg1
32 %bc = bitcast <16 x i8> %res to <2 x i64>
36 define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
37 ; SSE-LABEL: test_mm_add_epi16:
39 ; SSE-NEXT: paddw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfd,0xc1]
40 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
42 ; AVX1-LABEL: test_mm_add_epi16:
44 ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
45 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
47 ; AVX512-LABEL: test_mm_add_epi16:
49 ; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
50 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
51 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
52 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
53 %res = add <8 x i16> %arg0, %arg1
54 %bc = bitcast <8 x i16> %res to <2 x i64>
58 define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
59 ; SSE-LABEL: test_mm_add_epi32:
61 ; SSE-NEXT: paddd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfe,0xc1]
62 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
64 ; AVX1-LABEL: test_mm_add_epi32:
66 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
67 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
69 ; AVX512-LABEL: test_mm_add_epi32:
71 ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
72 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
73 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
74 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
75 %res = add <4 x i32> %arg0, %arg1
76 %bc = bitcast <4 x i32> %res to <2 x i64>
80 define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
81 ; SSE-LABEL: test_mm_add_epi64:
83 ; SSE-NEXT: paddq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd4,0xc1]
84 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
86 ; AVX1-LABEL: test_mm_add_epi64:
88 ; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd4,0xc1]
89 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
91 ; AVX512-LABEL: test_mm_add_epi64:
93 ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
94 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
95 %res = add <2 x i64> %a0, %a1
99 define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
100 ; SSE-LABEL: test_mm_add_pd:
102 ; SSE-NEXT: addpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x58,0xc1]
103 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
105 ; AVX1-LABEL: test_mm_add_pd:
107 ; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
108 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
110 ; AVX512-LABEL: test_mm_add_pd:
112 ; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
113 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
114 %res = fadd <2 x double> %a0, %a1
115 ret <2 x double> %res
118 define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
119 ; SSE-LABEL: test_mm_add_sd:
121 ; SSE-NEXT: addsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x58,0xc1]
122 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
124 ; AVX1-LABEL: test_mm_add_sd:
126 ; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x58,0xc1]
127 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
129 ; AVX512-LABEL: test_mm_add_sd:
131 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1]
132 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
133 %ext0 = extractelement <2 x double> %a0, i32 0
134 %ext1 = extractelement <2 x double> %a1, i32 0
135 %fadd = fadd double %ext0, %ext1
136 %res = insertelement <2 x double> %a0, double %fadd, i32 0
137 ret <2 x double> %res
140 define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
141 ; SSE-LABEL: test_mm_adds_epi8:
143 ; SSE-NEXT: paddsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xec,0xc1]
144 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
146 ; AVX1-LABEL: test_mm_adds_epi8:
148 ; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xec,0xc1]
149 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
151 ; AVX512-LABEL: test_mm_adds_epi8:
153 ; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
154 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
155 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
156 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
157 %res = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
158 %bc = bitcast <16 x i8> %res to <2 x i64>
161 declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
163 define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
164 ; SSE-LABEL: test_mm_adds_epi16:
166 ; SSE-NEXT: paddsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xed,0xc1]
167 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
169 ; AVX1-LABEL: test_mm_adds_epi16:
171 ; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xed,0xc1]
172 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
174 ; AVX512-LABEL: test_mm_adds_epi16:
176 ; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
177 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
178 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
179 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
180 %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
181 %bc = bitcast <8 x i16> %res to <2 x i64>
184 declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
186 define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
187 ; SSE-LABEL: test_mm_adds_epu8:
189 ; SSE-NEXT: paddusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdc,0xc1]
190 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
192 ; AVX1-LABEL: test_mm_adds_epu8:
194 ; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdc,0xc1]
195 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
197 ; AVX512-LABEL: test_mm_adds_epu8:
199 ; AVX512-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
200 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
201 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
202 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
203 %res = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
204 %bc = bitcast <16 x i8> %res to <2 x i64>
207 declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
209 define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
210 ; SSE-LABEL: test_mm_adds_epu16:
212 ; SSE-NEXT: paddusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdd,0xc1]
213 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
215 ; AVX1-LABEL: test_mm_adds_epu16:
217 ; AVX1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdd,0xc1]
218 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
220 ; AVX512-LABEL: test_mm_adds_epu16:
222 ; AVX512-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
223 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
224 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
225 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
226 %res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
227 %bc = bitcast <8 x i16> %res to <2 x i64>
230 declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
232 define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
233 ; SSE-LABEL: test_mm_and_pd:
235 ; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
236 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
238 ; AVX1-LABEL: test_mm_and_pd:
240 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
241 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
243 ; AVX512-LABEL: test_mm_and_pd:
245 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
246 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
247 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
248 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
249 %res = and <4 x i32> %arg0, %arg1
250 %bc = bitcast <4 x i32> %res to <2 x double>
254 define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
255 ; SSE-LABEL: test_mm_and_si128:
257 ; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1]
258 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
260 ; AVX1-LABEL: test_mm_and_si128:
262 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
263 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
265 ; AVX512-LABEL: test_mm_and_si128:
267 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
268 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
269 %res = and <2 x i64> %a0, %a1
273 define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
274 ; SSE-LABEL: test_mm_andnot_pd:
276 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
277 ; SSE-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
278 ; SSE-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
279 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
281 ; AVX1-LABEL: test_mm_andnot_pd:
283 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
284 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
285 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
286 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
288 ; AVX512-LABEL: test_mm_andnot_pd:
290 ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
291 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
292 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
293 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
294 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
295 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
296 %res = and <4 x i32> %not, %arg1
297 %bc = bitcast <4 x i32> %res to <2 x double>
301 define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
302 ; SSE-LABEL: test_mm_andnot_si128:
304 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2]
305 ; SSE-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2]
306 ; SSE-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1]
307 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
309 ; AVX1-LABEL: test_mm_andnot_si128:
311 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
312 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2]
313 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1]
314 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
316 ; AVX512-LABEL: test_mm_andnot_si128:
318 ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f]
319 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1]
320 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
321 %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
322 %res = and <2 x i64> %not, %a1
326 define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
327 ; SSE-LABEL: test_mm_avg_epu8:
329 ; SSE-NEXT: pavgb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe0,0xc1]
330 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
332 ; AVX1-LABEL: test_mm_avg_epu8:
334 ; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe0,0xc1]
335 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
337 ; AVX512-LABEL: test_mm_avg_epu8:
339 ; AVX512-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
340 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
341 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
342 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
343 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
344 %bc = bitcast <16 x i8> %res to <2 x i64>
347 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
349 define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
350 ; SSE-LABEL: test_mm_avg_epu16:
352 ; SSE-NEXT: pavgw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe3,0xc1]
353 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
355 ; AVX1-LABEL: test_mm_avg_epu16:
357 ; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe3,0xc1]
358 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
360 ; AVX512-LABEL: test_mm_avg_epu16:
362 ; AVX512-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
363 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
364 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
365 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
366 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
367 %bc = bitcast <8 x i16> %res to <2 x i64>
370 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
372 define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
373 ; SSE-LABEL: test_mm_bslli_si128:
375 ; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05]
376 ; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
377 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
379 ; AVX1-LABEL: test_mm_bslli_si128:
381 ; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05]
382 ; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
383 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
385 ; AVX512-LABEL: test_mm_bslli_si128:
387 ; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05]
388 ; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
389 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
390 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
391 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
392 %bc = bitcast <16 x i8> %res to <2 x i64>
396 define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
397 ; SSE-LABEL: test_mm_bsrli_si128:
399 ; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05]
400 ; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
401 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
403 ; AVX1-LABEL: test_mm_bsrli_si128:
405 ; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05]
406 ; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
407 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
409 ; AVX512-LABEL: test_mm_bsrli_si128:
411 ; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05]
412 ; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
413 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
414 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
415 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
416 %bc = bitcast <16 x i8> %res to <2 x i64>
420 define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
421 ; CHECK-LABEL: test_mm_castpd_ps:
423 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
424 %res = bitcast <2 x double> %a0 to <4 x float>
428 define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
429 ; CHECK-LABEL: test_mm_castpd_si128:
431 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
432 %res = bitcast <2 x double> %a0 to <2 x i64>
436 define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
437 ; CHECK-LABEL: test_mm_castps_pd:
439 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
440 %res = bitcast <4 x float> %a0 to <2 x double>
441 ret <2 x double> %res
444 define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
445 ; CHECK-LABEL: test_mm_castps_si128:
447 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
448 %res = bitcast <4 x float> %a0 to <2 x i64>
452 define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
453 ; CHECK-LABEL: test_mm_castsi128_pd:
455 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
456 %res = bitcast <2 x i64> %a0 to <2 x double>
457 ret <2 x double> %res
460 define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
461 ; CHECK-LABEL: test_mm_castsi128_ps:
463 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
464 %res = bitcast <2 x i64> %a0 to <4 x float>
468 define void @test_mm_clflush(ptr %a0) nounwind {
469 ; X86-LABEL: test_mm_clflush:
471 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
472 ; X86-NEXT: clflush (%eax) # encoding: [0x0f,0xae,0x38]
473 ; X86-NEXT: retl # encoding: [0xc3]
475 ; X64-LABEL: test_mm_clflush:
477 ; X64-NEXT: clflush (%rdi) # encoding: [0x0f,0xae,0x3f]
478 ; X64-NEXT: retq # encoding: [0xc3]
480 ; X32-LABEL: test_mm_clflush:
482 ; X32-NEXT: clflush (%edi) # encoding: [0x67,0x0f,0xae,0x3f]
483 ; X32-NEXT: retq # encoding: [0xc3]
484 call void @llvm.x86.sse2.clflush(ptr %a0)
487 declare void @llvm.x86.sse2.clflush(ptr) nounwind readnone
489 define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
490 ; SSE-LABEL: test_mm_cmpeq_epi8:
492 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x74,0xc1]
493 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
495 ; AVX1-LABEL: test_mm_cmpeq_epi8:
497 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x74,0xc1]
498 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
500 ; AVX512-LABEL: test_mm_cmpeq_epi8:
502 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
503 ; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
504 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
505 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
506 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
507 %cmp = icmp eq <16 x i8> %arg0, %arg1
508 %res = sext <16 x i1> %cmp to <16 x i8>
509 %bc = bitcast <16 x i8> %res to <2 x i64>
513 define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
514 ; SSE-LABEL: test_mm_cmpeq_epi16:
516 ; SSE-NEXT: pcmpeqw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x75,0xc1]
517 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
519 ; AVX1-LABEL: test_mm_cmpeq_epi16:
521 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x75,0xc1]
522 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
524 ; AVX512-LABEL: test_mm_cmpeq_epi16:
526 ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
527 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
528 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
529 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
530 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
531 %cmp = icmp eq <8 x i16> %arg0, %arg1
532 %res = sext <8 x i1> %cmp to <8 x i16>
533 %bc = bitcast <8 x i16> %res to <2 x i64>
537 define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
538 ; SSE-LABEL: test_mm_cmpeq_epi32:
540 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x76,0xc1]
541 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
543 ; AVX1-LABEL: test_mm_cmpeq_epi32:
545 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x76,0xc1]
546 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
548 ; AVX512-LABEL: test_mm_cmpeq_epi32:
550 ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
551 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
552 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
553 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
554 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
555 %cmp = icmp eq <4 x i32> %arg0, %arg1
556 %res = sext <4 x i1> %cmp to <4 x i32>
557 %bc = bitcast <4 x i32> %res to <2 x i64>
561 define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
562 ; SSE-LABEL: test_mm_cmpeq_pd:
564 ; SSE-NEXT: cmpeqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x00]
565 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
567 ; AVX1-LABEL: test_mm_cmpeq_pd:
569 ; AVX1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x00]
570 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
572 ; AVX512-LABEL: test_mm_cmpeq_pd:
574 ; AVX512-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x00]
575 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
576 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
577 %fcmp = fcmp oeq <2 x double> %a0, %a1
578 %sext = sext <2 x i1> %fcmp to <2 x i64>
579 %res = bitcast <2 x i64> %sext to <2 x double>
580 ret <2 x double> %res
583 define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
584 ; SSE-LABEL: test_mm_cmpeq_sd:
586 ; SSE-NEXT: cmpeqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x00]
587 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
589 ; AVX-LABEL: test_mm_cmpeq_sd:
591 ; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x00]
592 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
593 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
594 ret <2 x double> %res
596 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
598 define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
599 ; SSE-LABEL: test_mm_cmpge_pd:
601 ; SSE-NEXT: cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02]
602 ; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
603 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
605 ; AVX1-LABEL: test_mm_cmpge_pd:
607 ; AVX1-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x02]
608 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
610 ; AVX512-LABEL: test_mm_cmpge_pd:
612 ; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x02]
613 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
614 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
615 %fcmp = fcmp ole <2 x double> %a1, %a0
616 %sext = sext <2 x i1> %fcmp to <2 x i64>
617 %res = bitcast <2 x i64> %sext to <2 x double>
618 ret <2 x double> %res
621 define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
622 ; SSE-LABEL: test_mm_cmpge_sd:
624 ; SSE-NEXT: cmplesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x02]
625 ; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
626 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1]
627 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
629 ; AVX-LABEL: test_mm_cmpge_sd:
631 ; AVX-NEXT: vcmplesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x02]
632 ; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
633 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1]
634 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
635 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
636 %ext0 = extractelement <2 x double> %cmp, i32 0
637 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
638 %ext1 = extractelement <2 x double> %a0, i32 1
639 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
640 ret <2 x double> %ins1
643 define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
644 ; SSE-LABEL: test_mm_cmpgt_epi8:
646 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x64,0xc1]
647 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
649 ; AVX1-LABEL: test_mm_cmpgt_epi8:
651 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x64,0xc1]
652 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
654 ; AVX512-LABEL: test_mm_cmpgt_epi8:
656 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
657 ; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
658 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
659 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
660 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
661 %cmp = icmp sgt <16 x i8> %arg0, %arg1
662 %res = sext <16 x i1> %cmp to <16 x i8>
663 %bc = bitcast <16 x i8> %res to <2 x i64>
667 define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
668 ; SSE-LABEL: test_mm_cmpgt_epi16:
670 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x65,0xc1]
671 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
673 ; AVX1-LABEL: test_mm_cmpgt_epi16:
675 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x65,0xc1]
676 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
678 ; AVX512-LABEL: test_mm_cmpgt_epi16:
680 ; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
681 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
682 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
683 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
684 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
685 %cmp = icmp sgt <8 x i16> %arg0, %arg1
686 %res = sext <8 x i1> %cmp to <8 x i16>
687 %bc = bitcast <8 x i16> %res to <2 x i64>
691 define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
692 ; SSE-LABEL: test_mm_cmpgt_epi32:
694 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x66,0xc1]
695 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
697 ; AVX1-LABEL: test_mm_cmpgt_epi32:
699 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x66,0xc1]
700 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
702 ; AVX512-LABEL: test_mm_cmpgt_epi32:
704 ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
705 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
706 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
707 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
708 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
709 %cmp = icmp sgt <4 x i32> %arg0, %arg1
710 %res = sext <4 x i1> %cmp to <4 x i32>
711 %bc = bitcast <4 x i32> %res to <2 x i64>
715 define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
716 ; SSE-LABEL: test_mm_cmpgt_pd:
718 ; SSE-NEXT: cmpltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x01]
719 ; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
720 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
722 ; AVX1-LABEL: test_mm_cmpgt_pd:
724 ; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x01]
725 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
727 ; AVX512-LABEL: test_mm_cmpgt_pd:
729 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x01]
730 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
731 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
732 %fcmp = fcmp olt <2 x double> %a1, %a0
733 %sext = sext <2 x i1> %fcmp to <2 x i64>
734 %res = bitcast <2 x i64> %sext to <2 x double>
735 ret <2 x double> %res
738 define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
739 ; SSE-LABEL: test_mm_cmpgt_sd:
741 ; SSE-NEXT: cmpltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x01]
742 ; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
743 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1]
744 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
746 ; AVX-LABEL: test_mm_cmpgt_sd:
748 ; AVX-NEXT: vcmpltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x01]
749 ; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
750 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1]
751 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
752 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
753 %ext0 = extractelement <2 x double> %cmp, i32 0
754 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
755 %ext1 = extractelement <2 x double> %a0, i32 1
756 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
757 ret <2 x double> %ins1
760 define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
761 ; SSE-LABEL: test_mm_cmple_pd:
763 ; SSE-NEXT: cmplepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x02]
764 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
766 ; AVX1-LABEL: test_mm_cmple_pd:
768 ; AVX1-NEXT: vcmplepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x02]
769 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
771 ; AVX512-LABEL: test_mm_cmple_pd:
773 ; AVX512-NEXT: vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
774 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
775 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
776 %fcmp = fcmp ole <2 x double> %a0, %a1
777 %sext = sext <2 x i1> %fcmp to <2 x i64>
778 %res = bitcast <2 x i64> %sext to <2 x double>
779 ret <2 x double> %res
782 define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
783 ; SSE-LABEL: test_mm_cmple_sd:
785 ; SSE-NEXT: cmplesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x02]
786 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
788 ; AVX-LABEL: test_mm_cmple_sd:
790 ; AVX-NEXT: vcmplesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x02]
791 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
792 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
793 ret <2 x double> %res
796 define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
797 ; SSE-LABEL: test_mm_cmplt_epi8:
799 ; SSE-NEXT: pcmpgtb %xmm0, %xmm1 # encoding: [0x66,0x0f,0x64,0xc8]
800 ; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
801 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
803 ; AVX1-LABEL: test_mm_cmplt_epi8:
805 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x64,0xc0]
806 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
808 ; AVX512-LABEL: test_mm_cmplt_epi8:
810 ; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc0]
811 ; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
812 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
813 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
814 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
815 %cmp = icmp sgt <16 x i8> %arg1, %arg0
816 %res = sext <16 x i1> %cmp to <16 x i8>
817 %bc = bitcast <16 x i8> %res to <2 x i64>
821 define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
822 ; SSE-LABEL: test_mm_cmplt_epi16:
824 ; SSE-NEXT: pcmpgtw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x65,0xc8]
825 ; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
826 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
828 ; AVX1-LABEL: test_mm_cmplt_epi16:
830 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x65,0xc0]
831 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
833 ; AVX512-LABEL: test_mm_cmplt_epi16:
835 ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0]
836 ; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
837 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
838 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
839 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
840 %cmp = icmp sgt <8 x i16> %arg1, %arg0
841 %res = sext <8 x i1> %cmp to <8 x i16>
842 %bc = bitcast <8 x i16> %res to <2 x i64>
846 define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
847 ; SSE-LABEL: test_mm_cmplt_epi32:
849 ; SSE-NEXT: pcmpgtd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x66,0xc8]
850 ; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1]
851 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
853 ; AVX1-LABEL: test_mm_cmplt_epi32:
855 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x66,0xc0]
856 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
858 ; AVX512-LABEL: test_mm_cmplt_epi32:
860 ; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc0]
861 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
862 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
863 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
864 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
865 %cmp = icmp sgt <4 x i32> %arg1, %arg0
866 %res = sext <4 x i1> %cmp to <4 x i32>
867 %bc = bitcast <4 x i32> %res to <2 x i64>
871 define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
872 ; SSE-LABEL: test_mm_cmplt_pd:
874 ; SSE-NEXT: cmpltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x01]
875 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
877 ; AVX1-LABEL: test_mm_cmplt_pd:
879 ; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x01]
880 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
882 ; AVX512-LABEL: test_mm_cmplt_pd:
884 ; AVX512-NEXT: vcmpltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x01]
885 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
886 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
887 %fcmp = fcmp olt <2 x double> %a0, %a1
888 %sext = sext <2 x i1> %fcmp to <2 x i64>
889 %res = bitcast <2 x i64> %sext to <2 x double>
890 ret <2 x double> %res
893 define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
894 ; SSE-LABEL: test_mm_cmplt_sd:
896 ; SSE-NEXT: cmpltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x01]
897 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
899 ; AVX-LABEL: test_mm_cmplt_sd:
901 ; AVX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x01]
902 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
903 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
904 ret <2 x double> %res
907 define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
908 ; SSE-LABEL: test_mm_cmpneq_pd:
910 ; SSE-NEXT: cmpneqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x04]
911 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
913 ; AVX1-LABEL: test_mm_cmpneq_pd:
915 ; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x04]
916 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
918 ; AVX512-LABEL: test_mm_cmpneq_pd:
920 ; AVX512-NEXT: vcmpneqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x04]
921 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
922 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
923 %fcmp = fcmp une <2 x double> %a0, %a1
924 %sext = sext <2 x i1> %fcmp to <2 x i64>
925 %res = bitcast <2 x i64> %sext to <2 x double>
926 ret <2 x double> %res
929 define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
930 ; SSE-LABEL: test_mm_cmpneq_sd:
932 ; SSE-NEXT: cmpneqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x04]
933 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
935 ; AVX-LABEL: test_mm_cmpneq_sd:
937 ; AVX-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x04]
938 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
939 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
940 ret <2 x double> %res
943 define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
944 ; SSE-LABEL: test_mm_cmpnge_pd:
946 ; SSE-NEXT: cmpnlepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x06]
947 ; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
948 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
950 ; AVX1-LABEL: test_mm_cmpnge_pd:
952 ; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x06]
953 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
955 ; AVX512-LABEL: test_mm_cmpnge_pd:
957 ; AVX512-NEXT: vcmpnlepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x06]
958 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
959 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
960 %fcmp = fcmp ugt <2 x double> %a1, %a0
961 %sext = sext <2 x i1> %fcmp to <2 x i64>
962 %res = bitcast <2 x i64> %sext to <2 x double>
963 ret <2 x double> %res
966 define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
967 ; SSE-LABEL: test_mm_cmpnge_sd:
969 ; SSE-NEXT: cmpnlesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x06]
970 ; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
971 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1]
972 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
974 ; AVX-LABEL: test_mm_cmpnge_sd:
976 ; AVX-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x06]
977 ; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
978 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1]
979 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
980 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
981 %ext0 = extractelement <2 x double> %cmp, i32 0
982 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
983 %ext1 = extractelement <2 x double> %a0, i32 1
984 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
985 ret <2 x double> %ins1
988 define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
989 ; SSE-LABEL: test_mm_cmpngt_pd:
991 ; SSE-NEXT: cmpnltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x05]
992 ; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
993 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
995 ; AVX1-LABEL: test_mm_cmpngt_pd:
997 ; AVX1-NEXT: vcmpnltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x05]
998 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1000 ; AVX512-LABEL: test_mm_cmpngt_pd:
1002 ; AVX512-NEXT: vcmpnltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x05]
1003 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1004 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1005 %fcmp = fcmp uge <2 x double> %a1, %a0
1006 %sext = sext <2 x i1> %fcmp to <2 x i64>
1007 %res = bitcast <2 x i64> %sext to <2 x double>
1008 ret <2 x double> %res
1011 define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1012 ; SSE-LABEL: test_mm_cmpngt_sd:
1014 ; SSE-NEXT: cmpnltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x05]
1015 ; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
1016 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1]
1017 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1019 ; AVX-LABEL: test_mm_cmpngt_sd:
1021 ; AVX-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x05]
1022 ; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01]
1023 ; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1]
1024 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1025 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
1026 %ext0 = extractelement <2 x double> %cmp, i32 0
1027 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
1028 %ext1 = extractelement <2 x double> %a0, i32 1
1029 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
1030 ret <2 x double> %ins1
1033 define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1034 ; SSE-LABEL: test_mm_cmpnle_pd:
1036 ; SSE-NEXT: cmpnlepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x06]
1037 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1039 ; AVX1-LABEL: test_mm_cmpnle_pd:
1041 ; AVX1-NEXT: vcmpnlepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x06]
1042 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1044 ; AVX512-LABEL: test_mm_cmpnle_pd:
1046 ; AVX512-NEXT: vcmpnlepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x06]
1047 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1048 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1049 %fcmp = fcmp ugt <2 x double> %a0, %a1
1050 %sext = sext <2 x i1> %fcmp to <2 x i64>
1051 %res = bitcast <2 x i64> %sext to <2 x double>
1052 ret <2 x double> %res
1055 define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1056 ; SSE-LABEL: test_mm_cmpnle_sd:
1058 ; SSE-NEXT: cmpnlesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x06]
1059 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1061 ; AVX-LABEL: test_mm_cmpnle_sd:
1063 ; AVX-NEXT: vcmpnlesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x06]
1064 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1065 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
1066 ret <2 x double> %res
1069 define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1070 ; SSE-LABEL: test_mm_cmpnlt_pd:
1072 ; SSE-NEXT: cmpnltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x05]
1073 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1075 ; AVX1-LABEL: test_mm_cmpnlt_pd:
1077 ; AVX1-NEXT: vcmpnltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x05]
1078 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1080 ; AVX512-LABEL: test_mm_cmpnlt_pd:
1082 ; AVX512-NEXT: vcmpnltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x05]
1083 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1084 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1085 %fcmp = fcmp uge <2 x double> %a0, %a1
1086 %sext = sext <2 x i1> %fcmp to <2 x i64>
1087 %res = bitcast <2 x i64> %sext to <2 x double>
1088 ret <2 x double> %res
1091 define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1092 ; SSE-LABEL: test_mm_cmpnlt_sd:
1094 ; SSE-NEXT: cmpnltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x05]
1095 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1097 ; AVX-LABEL: test_mm_cmpnlt_sd:
1099 ; AVX-NEXT: vcmpnltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x05]
1100 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1101 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
1102 ret <2 x double> %res
1105 define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1106 ; SSE-LABEL: test_mm_cmpord_pd:
1108 ; SSE-NEXT: cmpordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x07]
1109 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1111 ; AVX1-LABEL: test_mm_cmpord_pd:
1113 ; AVX1-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x07]
1114 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1116 ; AVX512-LABEL: test_mm_cmpord_pd:
1118 ; AVX512-NEXT: vcmpordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x07]
1119 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1120 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1121 %fcmp = fcmp ord <2 x double> %a0, %a1
1122 %sext = sext <2 x i1> %fcmp to <2 x i64>
1123 %res = bitcast <2 x i64> %sext to <2 x double>
1124 ret <2 x double> %res
1127 define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1128 ; SSE-LABEL: test_mm_cmpord_sd:
1130 ; SSE-NEXT: cmpordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x07]
1131 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1133 ; AVX-LABEL: test_mm_cmpord_sd:
1135 ; AVX-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x07]
1136 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1137 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
1138 ret <2 x double> %res
1141 define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1142 ; SSE-LABEL: test_mm_cmpunord_pd:
1144 ; SSE-NEXT: cmpunordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x03]
1145 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1147 ; AVX1-LABEL: test_mm_cmpunord_pd:
1149 ; AVX1-NEXT: vcmpunordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x03]
1150 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1152 ; AVX512-LABEL: test_mm_cmpunord_pd:
1154 ; AVX512-NEXT: vcmpunordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x03]
1155 ; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1156 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1157 %fcmp = fcmp uno <2 x double> %a0, %a1
1158 %sext = sext <2 x i1> %fcmp to <2 x i64>
1159 %res = bitcast <2 x i64> %sext to <2 x double>
1160 ret <2 x double> %res
1163 define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1164 ; SSE-LABEL: test_mm_cmpunord_sd:
1166 ; SSE-NEXT: cmpunordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x03]
1167 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1169 ; AVX-LABEL: test_mm_cmpunord_sd:
1171 ; AVX-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x03]
1172 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1173 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
1174 ret <2 x double> %res
1177 define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1178 ; SSE-LABEL: test_mm_comieq_sd:
1180 ; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1181 ; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
1182 ; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
1183 ; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
1184 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1185 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1187 ; AVX1-LABEL: test_mm_comieq_sd:
1189 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1190 ; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
1191 ; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
1192 ; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
1193 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1194 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1196 ; AVX512-LABEL: test_mm_comieq_sd:
1198 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1199 ; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
1200 ; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
1201 ; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
1202 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1203 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1204 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
1207 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
1209 define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1210 ; SSE-LABEL: test_mm_comige_sd:
1212 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1213 ; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1214 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
1215 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1217 ; AVX1-LABEL: test_mm_comige_sd:
1219 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1220 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1221 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
1222 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1224 ; AVX512-LABEL: test_mm_comige_sd:
1226 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1227 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1228 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
1229 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1230 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
1233 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
1235 define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1236 ; SSE-LABEL: test_mm_comigt_sd:
1238 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1239 ; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1240 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
1241 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1243 ; AVX1-LABEL: test_mm_comigt_sd:
1245 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1246 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1247 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
1248 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1250 ; AVX512-LABEL: test_mm_comigt_sd:
1252 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1253 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1254 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
1255 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1256 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
1259 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
1261 define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1262 ; SSE-LABEL: test_mm_comile_sd:
1264 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1265 ; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8]
1266 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
1267 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1269 ; AVX1-LABEL: test_mm_comile_sd:
1271 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1272 ; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8]
1273 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
1274 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1276 ; AVX512-LABEL: test_mm_comile_sd:
1278 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1279 ; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
1280 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
1281 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1282 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1285 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1287 define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1288 ; SSE-LABEL: test_mm_comilt_sd:
1290 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1291 ; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8]
1292 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
1293 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1295 ; AVX1-LABEL: test_mm_comilt_sd:
1297 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1298 ; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8]
1299 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
1300 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1302 ; AVX512-LABEL: test_mm_comilt_sd:
1304 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
1305 ; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
1306 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
1307 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1308 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1311 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1313 define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1314 ; SSE-LABEL: test_mm_comineq_sd:
1316 ; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1]
1317 ; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
1318 ; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
1319 ; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
1320 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1321 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1323 ; AVX1-LABEL: test_mm_comineq_sd:
1325 ; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1]
1326 ; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
1327 ; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
1328 ; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
1329 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1330 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1332 ; AVX512-LABEL: test_mm_comineq_sd:
1334 ; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
1335 ; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
1336 ; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
1337 ; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
1338 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
1339 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1340 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1343 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1345 define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1346 ; SSE-LABEL: test_mm_cvtepi32_pd:
1348 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # encoding: [0xf3,0x0f,0xe6,0xc0]
1349 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1351 ; AVX1-LABEL: test_mm_cvtepi32_pd:
1353 ; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0]
1354 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1356 ; AVX512-LABEL: test_mm_cvtepi32_pd:
1358 ; AVX512-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
1359 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1360 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1361 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
1362 %res = sitofp <2 x i32> %ext to <2 x double>
1363 ret <2 x double> %res
1366 define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1367 ; SSE-LABEL: test_mm_cvtepi32_ps:
1369 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 # encoding: [0x0f,0x5b,0xc0]
1370 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1372 ; AVX1-LABEL: test_mm_cvtepi32_ps:
1374 ; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5b,0xc0]
1375 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1377 ; AVX512-LABEL: test_mm_cvtepi32_ps:
1379 ; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
1380 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1381 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1382 %res = sitofp <4 x i32> %arg0 to <4 x float>
1383 ret <4 x float> %res
1386 define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1387 ; SSE-LABEL: test_mm_cvtpd_epi32:
1389 ; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # encoding: [0xf2,0x0f,0xe6,0xc0]
1390 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1392 ; AVX1-LABEL: test_mm_cvtpd_epi32:
1394 ; AVX1-NEXT: vcvtpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xe6,0xc0]
1395 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1397 ; AVX512-LABEL: test_mm_cvtpd_epi32:
1399 ; AVX512-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
1400 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1401 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1402 %bc = bitcast <4 x i32> %res to <2 x i64>
1405 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1407 define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1408 ; SSE-LABEL: test_mm_cvtpd_ps:
1410 ; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5a,0xc0]
1411 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1413 ; AVX1-LABEL: test_mm_cvtpd_ps:
1415 ; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5a,0xc0]
1416 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1418 ; AVX512-LABEL: test_mm_cvtpd_ps:
1420 ; AVX512-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
1421 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1422 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1423 ret <4 x float> %res
1425 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1427 define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1428 ; SSE-LABEL: test_mm_cvtps_epi32:
1430 ; SSE-NEXT: cvtps2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5b,0xc0]
1431 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1433 ; AVX1-LABEL: test_mm_cvtps_epi32:
1435 ; AVX1-NEXT: vcvtps2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5b,0xc0]
1436 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1438 ; AVX512-LABEL: test_mm_cvtps_epi32:
1440 ; AVX512-NEXT: vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
1441 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1442 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1443 %bc = bitcast <4 x i32> %res to <2 x i64>
1446 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1448 define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1449 ; SSE-LABEL: test_mm_cvtps_pd:
1451 ; SSE-NEXT: cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0]
1452 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1454 ; AVX1-LABEL: test_mm_cvtps_pd:
1456 ; AVX1-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
1457 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1459 ; AVX512-LABEL: test_mm_cvtps_pd:
1461 ; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
1462 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1463 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
1464 %res = fpext <2 x float> %ext to <2 x double>
1465 ret <2 x double> %res
1468 define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1469 ; X86-SSE-LABEL: test_mm_cvtsd_f64:
1471 ; X86-SSE-NEXT: pushl %ebp # encoding: [0x55]
1472 ; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
1473 ; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
1474 ; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08]
1475 ; X86-SSE-NEXT: movlps %xmm0, (%esp) # encoding: [0x0f,0x13,0x04,0x24]
1476 ; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24]
1477 ; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
1478 ; X86-SSE-NEXT: popl %ebp # encoding: [0x5d]
1479 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1481 ; X86-AVX1-LABEL: test_mm_cvtsd_f64:
1482 ; X86-AVX1: # %bb.0:
1483 ; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55]
1484 ; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
1485 ; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
1486 ; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08]
1487 ; X86-AVX1-NEXT: vmovlps %xmm0, (%esp) # encoding: [0xc5,0xf8,0x13,0x04,0x24]
1488 ; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24]
1489 ; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
1490 ; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d]
1491 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1493 ; X86-AVX512-LABEL: test_mm_cvtsd_f64:
1494 ; X86-AVX512: # %bb.0:
1495 ; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55]
1496 ; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
1497 ; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
1498 ; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08]
1499 ; X86-AVX512-NEXT: vmovlps %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x04,0x24]
1500 ; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24]
1501 ; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
1502 ; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d]
1503 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1505 ; X64-LABEL: test_mm_cvtsd_f64:
1507 ; X64-NEXT: retq # encoding: [0xc3]
1509 ; X32-LABEL: test_mm_cvtsd_f64:
1511 ; X32-NEXT: retq # encoding: [0xc3]
1512 %res = extractelement <2 x double> %a0, i32 0
1516 define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1517 ; SSE-LABEL: test_mm_cvtsd_si32:
1519 ; SSE-NEXT: cvtsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2d,0xc0]
1520 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1522 ; AVX1-LABEL: test_mm_cvtsd_si32:
1524 ; AVX1-NEXT: vcvtsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2d,0xc0]
1525 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1527 ; AVX512-LABEL: test_mm_cvtsd_si32:
1529 ; AVX512-NEXT: vcvtsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0]
1530 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1531 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1534 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1536 define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
1537 ; SSE-LABEL: test_mm_cvtsd_ss:
1539 ; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1]
1540 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1542 ; AVX1-LABEL: test_mm_cvtsd_ss:
1544 ; AVX1-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1]
1545 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1547 ; AVX512-LABEL: test_mm_cvtsd_ss:
1549 ; AVX512-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1]
1550 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1551 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1552 ret <4 x float> %res
1554 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
1556 define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, ptr %p1) {
1557 ; X86-SSE-LABEL: test_mm_cvtsd_ss_load:
1559 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1560 ; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00]
1561 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1563 ; X86-AVX1-LABEL: test_mm_cvtsd_ss_load:
1564 ; X86-AVX1: # %bb.0:
1565 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1566 ; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00]
1567 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1569 ; X86-AVX512-LABEL: test_mm_cvtsd_ss_load:
1570 ; X86-AVX512: # %bb.0:
1571 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1572 ; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
1573 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1575 ; X64-SSE-LABEL: test_mm_cvtsd_ss_load:
1577 ; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07]
1578 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1580 ; X64-AVX1-LABEL: test_mm_cvtsd_ss_load:
1581 ; X64-AVX1: # %bb.0:
1582 ; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07]
1583 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1585 ; X64-AVX512-LABEL: test_mm_cvtsd_ss_load:
1586 ; X64-AVX512: # %bb.0:
1587 ; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
1588 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1590 ; X32-SSE-LABEL: test_mm_cvtsd_ss_load:
1592 ; X32-SSE-NEXT: cvtsd2ss (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x5a,0x07]
1593 ; X32-SSE-NEXT: retq # encoding: [0xc3]
1595 ; X32-AVX1-LABEL: test_mm_cvtsd_ss_load:
1596 ; X32-AVX1: # %bb.0:
1597 ; X32-AVX1-NEXT: vcvtsd2ss (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xfb,0x5a,0x07]
1598 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
1600 ; X32-AVX512-LABEL: test_mm_cvtsd_ss_load:
1601 ; X32-AVX512: # %bb.0:
1602 ; X32-AVX512-NEXT: vcvtsd2ss (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x5a,0x07]
1603 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
1604 %a1 = load <2 x double>, ptr %p1
1605 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1606 ret <4 x float> %res
1609 define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1610 ; SSE-LABEL: test_mm_cvtsi128_si32:
1612 ; SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
1613 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1615 ; AVX1-LABEL: test_mm_cvtsi128_si32:
1617 ; AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
1618 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1620 ; AVX512-LABEL: test_mm_cvtsi128_si32:
1622 ; AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
1623 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1624 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1625 %res = extractelement <4 x i32> %arg0, i32 0
1629 define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1630 ; X86-SSE-LABEL: test_mm_cvtsi32_sd:
1632 ; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04]
1633 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1635 ; X86-AVX1-LABEL: test_mm_cvtsi32_sd:
1636 ; X86-AVX1: # %bb.0:
1637 ; X86-AVX1-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
1638 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1640 ; X86-AVX512-LABEL: test_mm_cvtsi32_sd:
1641 ; X86-AVX512: # %bb.0:
1642 ; X86-AVX512-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
1643 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1645 ; X64-SSE-LABEL: test_mm_cvtsi32_sd:
1647 ; X64-SSE-NEXT: cvtsi2sd %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7]
1648 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1650 ; X64-AVX1-LABEL: test_mm_cvtsi32_sd:
1651 ; X64-AVX1: # %bb.0:
1652 ; X64-AVX1-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7]
1653 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1655 ; X64-AVX512-LABEL: test_mm_cvtsi32_sd:
1656 ; X64-AVX512: # %bb.0:
1657 ; X64-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
1658 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1660 ; X32-SSE-LABEL: test_mm_cvtsi32_sd:
1662 ; X32-SSE-NEXT: cvtsi2sd %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7]
1663 ; X32-SSE-NEXT: retq # encoding: [0xc3]
1665 ; X32-AVX1-LABEL: test_mm_cvtsi32_sd:
1666 ; X32-AVX1: # %bb.0:
1667 ; X32-AVX1-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7]
1668 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
1670 ; X32-AVX512-LABEL: test_mm_cvtsi32_sd:
1671 ; X32-AVX512: # %bb.0:
1672 ; X32-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
1673 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
1674 %cvt = sitofp i32 %a1 to double
1675 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1676 ret <2 x double> %res
1679 define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1680 ; X86-SSE-LABEL: test_mm_cvtsi32_si128:
1682 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1683 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
1684 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1686 ; X86-AVX1-LABEL: test_mm_cvtsi32_si128:
1687 ; X86-AVX1: # %bb.0:
1688 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1689 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
1690 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1692 ; X86-AVX512-LABEL: test_mm_cvtsi32_si128:
1693 ; X86-AVX512: # %bb.0:
1694 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1695 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
1696 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1698 ; X64-SSE-LABEL: test_mm_cvtsi32_si128:
1700 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
1701 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1703 ; X64-AVX1-LABEL: test_mm_cvtsi32_si128:
1704 ; X64-AVX1: # %bb.0:
1705 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
1706 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1708 ; X64-AVX512-LABEL: test_mm_cvtsi32_si128:
1709 ; X64-AVX512: # %bb.0:
1710 ; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
1711 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1713 ; X32-SSE-LABEL: test_mm_cvtsi32_si128:
1715 ; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
1716 ; X32-SSE-NEXT: retq # encoding: [0xc3]
1718 ; X32-AVX1-LABEL: test_mm_cvtsi32_si128:
1719 ; X32-AVX1: # %bb.0:
1720 ; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
1721 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
1723 ; X32-AVX512-LABEL: test_mm_cvtsi32_si128:
1724 ; X32-AVX512: # %bb.0:
1725 ; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
1726 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
1727 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1728 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1729 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1730 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1731 %res = bitcast <4 x i32> %res3 to <2 x i64>
1735 define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1736 ; SSE-LABEL: test_mm_cvtss_sd:
1738 ; SSE-NEXT: cvtss2sd %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5a,0xc1]
1739 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1741 ; AVX1-LABEL: test_mm_cvtss_sd:
1743 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5a,0xc1]
1744 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1746 ; AVX512-LABEL: test_mm_cvtss_sd:
1748 ; AVX512-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1]
1749 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1750 %ext = extractelement <4 x float> %a1, i32 0
1751 %cvt = fpext float %ext to double
1752 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1753 ret <2 x double> %res
1756 define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1757 ; SSE-LABEL: test_mm_cvttpd_epi32:
1759 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0xe6,0xc0]
1760 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1762 ; AVX1-LABEL: test_mm_cvttpd_epi32:
1764 ; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe6,0xc0]
1765 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1767 ; AVX512-LABEL: test_mm_cvttpd_epi32:
1769 ; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
1770 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1771 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1772 %bc = bitcast <4 x i32> %res to <2 x i64>
1775 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1777 define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1778 ; SSE-LABEL: test_mm_cvttps_epi32:
1780 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x5b,0xc0]
1781 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1783 ; AVX1-LABEL: test_mm_cvttps_epi32:
1785 ; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5b,0xc0]
1786 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1788 ; AVX512-LABEL: test_mm_cvttps_epi32:
1790 ; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
1791 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1792 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
1793 %bc = bitcast <4 x i32> %res to <2 x i64>
1796 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
1798 define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1799 ; SSE-LABEL: test_mm_cvttsd_si32:
1801 ; SSE-NEXT: cvttsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2c,0xc0]
1802 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1804 ; AVX1-LABEL: test_mm_cvttsd_si32:
1806 ; AVX1-NEXT: vcvttsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2c,0xc0]
1807 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1809 ; AVX512-LABEL: test_mm_cvttsd_si32:
1811 ; AVX512-NEXT: vcvttsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0]
1812 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1813 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
1816 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
1818 define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1819 ; SSE-LABEL: test_mm_div_pd:
1821 ; SSE-NEXT: divpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5e,0xc1]
1822 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1824 ; AVX1-LABEL: test_mm_div_pd:
1826 ; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5e,0xc1]
1827 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1829 ; AVX512-LABEL: test_mm_div_pd:
1831 ; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5e,0xc1]
1832 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1833 %res = fdiv <2 x double> %a0, %a1
1834 ret <2 x double> %res
1837 define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1838 ; SSE-LABEL: test_mm_div_sd:
1840 ; SSE-NEXT: divsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5e,0xc1]
1841 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1843 ; AVX1-LABEL: test_mm_div_sd:
1845 ; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5e,0xc1]
1846 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1848 ; AVX512-LABEL: test_mm_div_sd:
1850 ; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1]
1851 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1852 %ext0 = extractelement <2 x double> %a0, i32 0
1853 %ext1 = extractelement <2 x double> %a1, i32 0
1854 %fdiv = fdiv double %ext0, %ext1
1855 %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1856 ret <2 x double> %res
1859 define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1860 ; SSE-LABEL: test_mm_extract_epi16:
1862 ; SSE-NEXT: pextrw $1, %xmm0, %eax # encoding: [0x66,0x0f,0xc5,0xc0,0x01]
1863 ; SSE-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
1864 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1866 ; AVX1-LABEL: test_mm_extract_epi16:
1868 ; AVX1-NEXT: vpextrw $1, %xmm0, %eax # encoding: [0xc5,0xf9,0xc5,0xc0,0x01]
1869 ; AVX1-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
1870 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1872 ; AVX512-LABEL: test_mm_extract_epi16:
1874 ; AVX512-NEXT: vpextrw $1, %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc0,0x01]
1875 ; AVX512-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
1876 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1877 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1878 %ext = extractelement <8 x i16> %arg0, i32 1
1879 %res = zext i16 %ext to i32
1883 define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1884 ; X86-SSE-LABEL: test_mm_insert_epi16:
1886 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
1887 ; X86-SSE-NEXT: pinsrw $1, %eax, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc0,0x01]
1888 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1890 ; X86-AVX1-LABEL: test_mm_insert_epi16:
1891 ; X86-AVX1: # %bb.0:
1892 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
1893 ; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
1894 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1896 ; X86-AVX512-LABEL: test_mm_insert_epi16:
1897 ; X86-AVX512: # %bb.0:
1898 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
1899 ; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
1900 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1902 ; X64-SSE-LABEL: test_mm_insert_epi16:
1904 ; X64-SSE-NEXT: pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01]
1905 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1907 ; X64-AVX1-LABEL: test_mm_insert_epi16:
1908 ; X64-AVX1: # %bb.0:
1909 ; X64-AVX1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
1910 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1912 ; X64-AVX512-LABEL: test_mm_insert_epi16:
1913 ; X64-AVX512: # %bb.0:
1914 ; X64-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
1915 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1917 ; X32-SSE-LABEL: test_mm_insert_epi16:
1919 ; X32-SSE-NEXT: pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01]
1920 ; X32-SSE-NEXT: retq # encoding: [0xc3]
1922 ; X32-AVX1-LABEL: test_mm_insert_epi16:
1923 ; X32-AVX1: # %bb.0:
1924 ; X32-AVX1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
1925 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
1927 ; X32-AVX512-LABEL: test_mm_insert_epi16:
1928 ; X32-AVX512: # %bb.0:
1929 ; X32-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01]
1930 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
1931 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1932 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1933 %bc = bitcast <8 x i16> %res to <2 x i64>
1937 define void @test_mm_lfence() nounwind {
1938 ; CHECK-LABEL: test_mm_lfence:
1940 ; CHECK-NEXT: lfence # encoding: [0x0f,0xae,0xe8]
1941 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1942 call void @llvm.x86.sse2.lfence()
1945 declare void @llvm.x86.sse2.lfence() nounwind readnone
1947 define <2 x double> @test_mm_load_pd(ptr %a0) nounwind {
1948 ; X86-SSE-LABEL: test_mm_load_pd:
1950 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1951 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
1952 ; X86-SSE-NEXT: retl # encoding: [0xc3]
1954 ; X86-AVX1-LABEL: test_mm_load_pd:
1955 ; X86-AVX1: # %bb.0:
1956 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1957 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
1958 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
1960 ; X86-AVX512-LABEL: test_mm_load_pd:
1961 ; X86-AVX512: # %bb.0:
1962 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1963 ; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
1964 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
1966 ; X64-SSE-LABEL: test_mm_load_pd:
1968 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
1969 ; X64-SSE-NEXT: retq # encoding: [0xc3]
1971 ; X64-AVX1-LABEL: test_mm_load_pd:
1972 ; X64-AVX1: # %bb.0:
1973 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
1974 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
1976 ; X64-AVX512-LABEL: test_mm_load_pd:
1977 ; X64-AVX512: # %bb.0:
1978 ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
1979 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
1981 ; X32-SSE-LABEL: test_mm_load_pd:
1983 ; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07]
1984 ; X32-SSE-NEXT: retq # encoding: [0xc3]
1986 ; X32-AVX1-LABEL: test_mm_load_pd:
1987 ; X32-AVX1: # %bb.0:
1988 ; X32-AVX1-NEXT: vmovaps (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x28,0x07]
1989 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
1991 ; X32-AVX512-LABEL: test_mm_load_pd:
1992 ; X32-AVX512: # %bb.0:
1993 ; X32-AVX512-NEXT: vmovaps (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x28,0x07]
1994 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
1995 %res = load <2 x double>, ptr %a0, align 16
1996 ret <2 x double> %res
1999 define <2 x double> @test_mm_load_sd(ptr %a0) nounwind {
2000 ; X86-SSE-LABEL: test_mm_load_sd:
2002 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2003 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2004 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x00]
2005 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2007 ; X86-AVX1-LABEL: test_mm_load_sd:
2008 ; X86-AVX1: # %bb.0:
2009 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2010 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2011 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x00]
2012 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2014 ; X86-AVX512-LABEL: test_mm_load_sd:
2015 ; X86-AVX512: # %bb.0:
2016 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2017 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2018 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
2019 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2021 ; X64-SSE-LABEL: test_mm_load_sd:
2023 ; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2024 ; X64-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x07]
2025 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2027 ; X64-AVX1-LABEL: test_mm_load_sd:
2028 ; X64-AVX1: # %bb.0:
2029 ; X64-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2030 ; X64-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x07]
2031 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2033 ; X64-AVX512-LABEL: test_mm_load_sd:
2034 ; X64-AVX512: # %bb.0:
2035 ; X64-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2036 ; X64-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
2037 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2039 ; X32-SSE-LABEL: test_mm_load_sd:
2041 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2042 ; X32-SSE-NEXT: # encoding: [0x67,0xf2,0x0f,0x10,0x07]
2043 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2045 ; X32-AVX1-LABEL: test_mm_load_sd:
2046 ; X32-AVX1: # %bb.0:
2047 ; X32-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2048 ; X32-AVX1-NEXT: # encoding: [0x67,0xc5,0xfb,0x10,0x07]
2049 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2051 ; X32-AVX512-LABEL: test_mm_load_sd:
2052 ; X32-AVX512: # %bb.0:
2053 ; X32-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2054 ; X32-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07]
2055 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2056 %ld = load double, ptr %a0, align 1
2057 %res0 = insertelement <2 x double> undef, double %ld, i32 0
2058 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
2059 ret <2 x double> %res1
2062 define <2 x i64> @test_mm_load_si128(ptr %a0) nounwind {
2063 ; X86-SSE-LABEL: test_mm_load_si128:
2065 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2066 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
2067 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2069 ; X86-AVX1-LABEL: test_mm_load_si128:
2070 ; X86-AVX1: # %bb.0:
2071 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2072 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
2073 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2075 ; X86-AVX512-LABEL: test_mm_load_si128:
2076 ; X86-AVX512: # %bb.0:
2077 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2078 ; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
2079 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2081 ; X64-SSE-LABEL: test_mm_load_si128:
2083 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
2084 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2086 ; X64-AVX1-LABEL: test_mm_load_si128:
2087 ; X64-AVX1: # %bb.0:
2088 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
2089 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2091 ; X64-AVX512-LABEL: test_mm_load_si128:
2092 ; X64-AVX512: # %bb.0:
2093 ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
2094 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2096 ; X32-SSE-LABEL: test_mm_load_si128:
2098 ; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07]
2099 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2101 ; X32-AVX1-LABEL: test_mm_load_si128:
2102 ; X32-AVX1: # %bb.0:
2103 ; X32-AVX1-NEXT: vmovaps (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x28,0x07]
2104 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2106 ; X32-AVX512-LABEL: test_mm_load_si128:
2107 ; X32-AVX512: # %bb.0:
2108 ; X32-AVX512-NEXT: vmovaps (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x28,0x07]
2109 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2110 %res = load <2 x i64>, ptr %a0, align 16
2114 define <2 x double> @test_mm_load1_pd(ptr %a0) nounwind {
2115 ; X86-SSE-LABEL: test_mm_load1_pd:
2117 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2118 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2119 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x00]
2120 ; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
2121 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0]
2122 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2124 ; X86-AVX1-LABEL: test_mm_load1_pd:
2125 ; X86-AVX1: # %bb.0:
2126 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2127 ; X86-AVX1-NEXT: vmovddup (%eax), %xmm0 # encoding: [0xc5,0xfb,0x12,0x00]
2128 ; X86-AVX1-NEXT: # xmm0 = mem[0,0]
2129 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2131 ; X86-AVX512-LABEL: test_mm_load1_pd:
2132 ; X86-AVX512: # %bb.0:
2133 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2134 ; X86-AVX512-NEXT: vmovddup (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x00]
2135 ; X86-AVX512-NEXT: # xmm0 = mem[0,0]
2136 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2138 ; X64-SSE-LABEL: test_mm_load1_pd:
2140 ; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2141 ; X64-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x07]
2142 ; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
2143 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0]
2144 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2146 ; X64-AVX1-LABEL: test_mm_load1_pd:
2147 ; X64-AVX1: # %bb.0:
2148 ; X64-AVX1-NEXT: vmovddup (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x12,0x07]
2149 ; X64-AVX1-NEXT: # xmm0 = mem[0,0]
2150 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2152 ; X64-AVX512-LABEL: test_mm_load1_pd:
2153 ; X64-AVX512: # %bb.0:
2154 ; X64-AVX512-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07]
2155 ; X64-AVX512-NEXT: # xmm0 = mem[0,0]
2156 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2158 ; X32-SSE-LABEL: test_mm_load1_pd:
2160 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2161 ; X32-SSE-NEXT: # encoding: [0x67,0xf2,0x0f,0x10,0x07]
2162 ; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
2163 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0]
2164 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2166 ; X32-AVX1-LABEL: test_mm_load1_pd:
2167 ; X32-AVX1: # %bb.0:
2168 ; X32-AVX1-NEXT: vmovddup (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x12,0x07]
2169 ; X32-AVX1-NEXT: # xmm0 = mem[0,0]
2170 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2172 ; X32-AVX512-LABEL: test_mm_load1_pd:
2173 ; X32-AVX512: # %bb.0:
2174 ; X32-AVX512-NEXT: vmovddup (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x12,0x07]
2175 ; X32-AVX512-NEXT: # xmm0 = mem[0,0]
2176 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2177 %ld = load double, ptr %a0, align 8
2178 %res0 = insertelement <2 x double> undef, double %ld, i32 0
2179 %res1 = insertelement <2 x double> %res0, double %ld, i32 1
2180 ret <2 x double> %res1
2183 define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, ptr %a1) nounwind {
2184 ; X86-SSE-LABEL: test_mm_loadh_pd:
2186 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2187 ; X86-SSE-NEXT: movhps (%eax), %xmm0 # encoding: [0x0f,0x16,0x00]
2188 ; X86-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
2189 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2191 ; X86-AVX1-LABEL: test_mm_loadh_pd:
2192 ; X86-AVX1: # %bb.0:
2193 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2194 ; X86-AVX1-NEXT: vmovhps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x00]
2195 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
2196 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2198 ; X86-AVX512-LABEL: test_mm_loadh_pd:
2199 ; X86-AVX512: # %bb.0:
2200 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2201 ; X86-AVX512-NEXT: vmovhps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x00]
2202 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
2203 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2205 ; X64-SSE-LABEL: test_mm_loadh_pd:
2207 ; X64-SSE-NEXT: movhps (%rdi), %xmm0 # encoding: [0x0f,0x16,0x07]
2208 ; X64-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
2209 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2211 ; X64-AVX1-LABEL: test_mm_loadh_pd:
2212 ; X64-AVX1: # %bb.0:
2213 ; X64-AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x07]
2214 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
2215 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2217 ; X64-AVX512-LABEL: test_mm_loadh_pd:
2218 ; X64-AVX512: # %bb.0:
2219 ; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07]
2220 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
2221 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2223 ; X32-SSE-LABEL: test_mm_loadh_pd:
2225 ; X32-SSE-NEXT: movhps (%edi), %xmm0 # encoding: [0x67,0x0f,0x16,0x07]
2226 ; X32-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
2227 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2229 ; X32-AVX1-LABEL: test_mm_loadh_pd:
2230 ; X32-AVX1: # %bb.0:
2231 ; X32-AVX1-NEXT: vmovhps (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xf8,0x16,0x07]
2232 ; X32-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
2233 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2235 ; X32-AVX512-LABEL: test_mm_loadh_pd:
2236 ; X32-AVX512: # %bb.0:
2237 ; X32-AVX512-NEXT: vmovhps (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x16,0x07]
2238 ; X32-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1]
2239 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2240 %ld = load double, ptr %a1, align 8
2241 %res = insertelement <2 x double> %a0, double %ld, i32 1
2242 ret <2 x double> %res
2245 define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, ptr %a1) nounwind {
2246 ; X86-SSE-LABEL: test_mm_loadl_epi64:
2248 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2249 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2250 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x00]
2251 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2253 ; X86-AVX1-LABEL: test_mm_loadl_epi64:
2254 ; X86-AVX1: # %bb.0:
2255 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2256 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2257 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x00]
2258 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2260 ; X86-AVX512-LABEL: test_mm_loadl_epi64:
2261 ; X86-AVX512: # %bb.0:
2262 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2263 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2264 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
2265 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2267 ; X64-SSE-LABEL: test_mm_loadl_epi64:
2269 ; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2270 ; X64-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x07]
2271 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2273 ; X64-AVX1-LABEL: test_mm_loadl_epi64:
2274 ; X64-AVX1: # %bb.0:
2275 ; X64-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2276 ; X64-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x07]
2277 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2279 ; X64-AVX512-LABEL: test_mm_loadl_epi64:
2280 ; X64-AVX512: # %bb.0:
2281 ; X64-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2282 ; X64-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
2283 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2285 ; X32-SSE-LABEL: test_mm_loadl_epi64:
2287 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2288 ; X32-SSE-NEXT: # encoding: [0x67,0xf2,0x0f,0x10,0x07]
2289 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2291 ; X32-AVX1-LABEL: test_mm_loadl_epi64:
2292 ; X32-AVX1: # %bb.0:
2293 ; X32-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2294 ; X32-AVX1-NEXT: # encoding: [0x67,0xc5,0xfb,0x10,0x07]
2295 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2297 ; X32-AVX512-LABEL: test_mm_loadl_epi64:
2298 ; X32-AVX512: # %bb.0:
2299 ; X32-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2300 ; X32-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07]
2301 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2302 %ld = load i64, ptr %a1, align 1
2303 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
2304 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
2308 define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, ptr %a1) nounwind {
2309 ; X86-SSE-LABEL: test_mm_loadl_pd:
2311 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2312 ; X86-SSE-NEXT: movlps (%eax), %xmm0 # encoding: [0x0f,0x12,0x00]
2313 ; X86-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
2314 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2316 ; X86-AVX1-LABEL: test_mm_loadl_pd:
2317 ; X86-AVX1: # %bb.0:
2318 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2319 ; X86-AVX1-NEXT: vmovlps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x00]
2320 ; X86-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
2321 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2323 ; X86-AVX512-LABEL: test_mm_loadl_pd:
2324 ; X86-AVX512: # %bb.0:
2325 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2326 ; X86-AVX512-NEXT: vmovlps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x00]
2327 ; X86-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
2328 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2330 ; X64-SSE-LABEL: test_mm_loadl_pd:
2332 ; X64-SSE-NEXT: movlps (%rdi), %xmm0 # encoding: [0x0f,0x12,0x07]
2333 ; X64-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
2334 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2336 ; X64-AVX1-LABEL: test_mm_loadl_pd:
2337 ; X64-AVX1: # %bb.0:
2338 ; X64-AVX1-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x07]
2339 ; X64-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
2340 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2342 ; X64-AVX512-LABEL: test_mm_loadl_pd:
2343 ; X64-AVX512: # %bb.0:
2344 ; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07]
2345 ; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
2346 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2348 ; X32-SSE-LABEL: test_mm_loadl_pd:
2350 ; X32-SSE-NEXT: movlps (%edi), %xmm0 # encoding: [0x67,0x0f,0x12,0x07]
2351 ; X32-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
2352 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2354 ; X32-AVX1-LABEL: test_mm_loadl_pd:
2355 ; X32-AVX1: # %bb.0:
2356 ; X32-AVX1-NEXT: vmovlps (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xf8,0x12,0x07]
2357 ; X32-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
2358 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2360 ; X32-AVX512-LABEL: test_mm_loadl_pd:
2361 ; X32-AVX512: # %bb.0:
2362 ; X32-AVX512-NEXT: vmovlps (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x12,0x07]
2363 ; X32-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3]
2364 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2365 %ld = load double, ptr %a1, align 8
2366 %res = insertelement <2 x double> %a0, double %ld, i32 0
2367 ret <2 x double> %res
2370 define <2 x double> @test_mm_loadr_pd(ptr %a0) nounwind {
2371 ; X86-SSE-LABEL: test_mm_loadr_pd:
2373 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2374 ; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
2375 ; X86-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
2376 ; X86-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
2377 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2379 ; X86-AVX1-LABEL: test_mm_loadr_pd:
2380 ; X86-AVX1: # %bb.0:
2381 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2382 ; X86-AVX1-NEXT: vpermilpd $1, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01]
2383 ; X86-AVX1-NEXT: # xmm0 = mem[1,0]
2384 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2386 ; X86-AVX512-LABEL: test_mm_loadr_pd:
2387 ; X86-AVX512: # %bb.0:
2388 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2389 ; X86-AVX512-NEXT: vpermilpd $1, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01]
2390 ; X86-AVX512-NEXT: # xmm0 = mem[1,0]
2391 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2393 ; X64-SSE-LABEL: test_mm_loadr_pd:
2395 ; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
2396 ; X64-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
2397 ; X64-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
2398 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2400 ; X64-AVX1-LABEL: test_mm_loadr_pd:
2401 ; X64-AVX1: # %bb.0:
2402 ; X64-AVX1-NEXT: vpermilpd $1, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
2403 ; X64-AVX1-NEXT: # xmm0 = mem[1,0]
2404 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2406 ; X64-AVX512-LABEL: test_mm_loadr_pd:
2407 ; X64-AVX512: # %bb.0:
2408 ; X64-AVX512-NEXT: vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01]
2409 ; X64-AVX512-NEXT: # xmm0 = mem[1,0]
2410 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2412 ; X32-SSE-LABEL: test_mm_loadr_pd:
2414 ; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07]
2415 ; X32-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
2416 ; X32-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
2417 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2419 ; X32-AVX1-LABEL: test_mm_loadr_pd:
2420 ; X32-AVX1: # %bb.0:
2421 ; X32-AVX1-NEXT: vpermilpd $1, (%edi), %xmm0 # encoding: [0x67,0xc4,0xe3,0x79,0x05,0x07,0x01]
2422 ; X32-AVX1-NEXT: # xmm0 = mem[1,0]
2423 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2425 ; X32-AVX512-LABEL: test_mm_loadr_pd:
2426 ; X32-AVX512: # %bb.0:
2427 ; X32-AVX512-NEXT: vpermilpd $1, (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc4,0xe3,0x79,0x05,0x07,0x01]
2428 ; X32-AVX512-NEXT: # xmm0 = mem[1,0]
2429 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2430 %ld = load <2 x double>, ptr %a0, align 16
2431 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
2432 ret <2 x double> %res
2435 define <2 x double> @test_mm_loadu_pd(ptr %a0) nounwind {
2436 ; X86-SSE-LABEL: test_mm_loadu_pd:
2438 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2439 ; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
2440 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2442 ; X86-AVX1-LABEL: test_mm_loadu_pd:
2443 ; X86-AVX1: # %bb.0:
2444 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2445 ; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
2446 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2448 ; X86-AVX512-LABEL: test_mm_loadu_pd:
2449 ; X86-AVX512: # %bb.0:
2450 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2451 ; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
2452 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2454 ; X64-SSE-LABEL: test_mm_loadu_pd:
2456 ; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
2457 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2459 ; X64-AVX1-LABEL: test_mm_loadu_pd:
2460 ; X64-AVX1: # %bb.0:
2461 ; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
2462 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2464 ; X64-AVX512-LABEL: test_mm_loadu_pd:
2465 ; X64-AVX512: # %bb.0:
2466 ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
2467 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2469 ; X32-SSE-LABEL: test_mm_loadu_pd:
2471 ; X32-SSE-NEXT: movups (%edi), %xmm0 # encoding: [0x67,0x0f,0x10,0x07]
2472 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2474 ; X32-AVX1-LABEL: test_mm_loadu_pd:
2475 ; X32-AVX1: # %bb.0:
2476 ; X32-AVX1-NEXT: vmovups (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x10,0x07]
2477 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2479 ; X32-AVX512-LABEL: test_mm_loadu_pd:
2480 ; X32-AVX512: # %bb.0:
2481 ; X32-AVX512-NEXT: vmovups (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x10,0x07]
2482 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2483 %res = load <2 x double>, ptr %a0, align 1
2484 ret <2 x double> %res
2487 define <2 x i64> @test_mm_loadu_si128(ptr %a0) nounwind {
2488 ; X86-SSE-LABEL: test_mm_loadu_si128:
2490 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2491 ; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00]
2492 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2494 ; X86-AVX1-LABEL: test_mm_loadu_si128:
2495 ; X86-AVX1: # %bb.0:
2496 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2497 ; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00]
2498 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2500 ; X86-AVX512-LABEL: test_mm_loadu_si128:
2501 ; X86-AVX512: # %bb.0:
2502 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2503 ; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
2504 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2506 ; X64-SSE-LABEL: test_mm_loadu_si128:
2508 ; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07]
2509 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2511 ; X64-AVX1-LABEL: test_mm_loadu_si128:
2512 ; X64-AVX1: # %bb.0:
2513 ; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07]
2514 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2516 ; X64-AVX512-LABEL: test_mm_loadu_si128:
2517 ; X64-AVX512: # %bb.0:
2518 ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
2519 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2521 ; X32-SSE-LABEL: test_mm_loadu_si128:
2523 ; X32-SSE-NEXT: movups (%edi), %xmm0 # encoding: [0x67,0x0f,0x10,0x07]
2524 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2526 ; X32-AVX1-LABEL: test_mm_loadu_si128:
2527 ; X32-AVX1: # %bb.0:
2528 ; X32-AVX1-NEXT: vmovups (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x10,0x07]
2529 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2531 ; X32-AVX512-LABEL: test_mm_loadu_si128:
2532 ; X32-AVX512: # %bb.0:
2533 ; X32-AVX512-NEXT: vmovups (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x10,0x07]
2534 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2535 %res = load <2 x i64>, ptr %a0, align 1
2539 define <2 x i64> @test_mm_loadu_si64(ptr nocapture readonly %A) {
2540 ; X86-SSE-LABEL: test_mm_loadu_si64:
2541 ; X86-SSE: # %bb.0: # %entry
2542 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2543 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2544 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x00]
2545 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2547 ; X86-AVX1-LABEL: test_mm_loadu_si64:
2548 ; X86-AVX1: # %bb.0: # %entry
2549 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2550 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2551 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x00]
2552 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2554 ; X86-AVX512-LABEL: test_mm_loadu_si64:
2555 ; X86-AVX512: # %bb.0: # %entry
2556 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2557 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2558 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00]
2559 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2561 ; X64-SSE-LABEL: test_mm_loadu_si64:
2562 ; X64-SSE: # %bb.0: # %entry
2563 ; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2564 ; X64-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x07]
2565 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2567 ; X64-AVX1-LABEL: test_mm_loadu_si64:
2568 ; X64-AVX1: # %bb.0: # %entry
2569 ; X64-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2570 ; X64-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x07]
2571 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2573 ; X64-AVX512-LABEL: test_mm_loadu_si64:
2574 ; X64-AVX512: # %bb.0: # %entry
2575 ; X64-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2576 ; X64-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
2577 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2579 ; X32-SSE-LABEL: test_mm_loadu_si64:
2580 ; X32-SSE: # %bb.0: # %entry
2581 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2582 ; X32-SSE-NEXT: # encoding: [0x67,0xf2,0x0f,0x10,0x07]
2583 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2585 ; X32-AVX1-LABEL: test_mm_loadu_si64:
2586 ; X32-AVX1: # %bb.0: # %entry
2587 ; X32-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2588 ; X32-AVX1-NEXT: # encoding: [0x67,0xc5,0xfb,0x10,0x07]
2589 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2591 ; X32-AVX512-LABEL: test_mm_loadu_si64:
2592 ; X32-AVX512: # %bb.0: # %entry
2593 ; X32-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2594 ; X32-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07]
2595 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2597 %0 = load i64, ptr %A, align 1
2598 %vecinit1.i = insertelement <2 x i64> <i64 undef, i64 0>, i64 %0, i32 0
2599 ret <2 x i64> %vecinit1.i
2602 define <2 x i64> @test_mm_loadu_si32(ptr nocapture readonly %A) {
2603 ; X86-SSE-LABEL: test_mm_loadu_si32:
2604 ; X86-SSE: # %bb.0: # %entry
2605 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2606 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2607 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x00]
2608 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2610 ; X86-AVX1-LABEL: test_mm_loadu_si32:
2611 ; X86-AVX1: # %bb.0: # %entry
2612 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2613 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2614 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x00]
2615 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2617 ; X86-AVX512-LABEL: test_mm_loadu_si32:
2618 ; X86-AVX512: # %bb.0: # %entry
2619 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2620 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2621 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
2622 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2624 ; X64-SSE-LABEL: test_mm_loadu_si32:
2625 ; X64-SSE: # %bb.0: # %entry
2626 ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2627 ; X64-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x07]
2628 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2630 ; X64-AVX1-LABEL: test_mm_loadu_si32:
2631 ; X64-AVX1: # %bb.0: # %entry
2632 ; X64-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2633 ; X64-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x07]
2634 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2636 ; X64-AVX512-LABEL: test_mm_loadu_si32:
2637 ; X64-AVX512: # %bb.0: # %entry
2638 ; X64-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2639 ; X64-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
2640 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2642 ; X32-SSE-LABEL: test_mm_loadu_si32:
2643 ; X32-SSE: # %bb.0: # %entry
2644 ; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2645 ; X32-SSE-NEXT: # encoding: [0x67,0xf3,0x0f,0x10,0x07]
2646 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2648 ; X32-AVX1-LABEL: test_mm_loadu_si32:
2649 ; X32-AVX1: # %bb.0: # %entry
2650 ; X32-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2651 ; X32-AVX1-NEXT: # encoding: [0x67,0xc5,0xfa,0x10,0x07]
2652 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2654 ; X32-AVX512-LABEL: test_mm_loadu_si32:
2655 ; X32-AVX512: # %bb.0: # %entry
2656 ; X32-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2657 ; X32-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfa,0x10,0x07]
2658 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2660 %0 = load i32, ptr %A, align 1
2661 %vecinit3.i = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %0, i32 0
2662 %1 = bitcast <4 x i32> %vecinit3.i to <2 x i64>
2666 define <2 x i64> @test_mm_loadu_si16(ptr nocapture readonly %A) {
2667 ; X86-SSE-LABEL: test_mm_loadu_si16:
2668 ; X86-SSE: # %bb.0: # %entry
2669 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2670 ; X86-SSE-NEXT: movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00]
2671 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2672 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2674 ; X86-AVX1-LABEL: test_mm_loadu_si16:
2675 ; X86-AVX1: # %bb.0: # %entry
2676 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2677 ; X86-AVX1-NEXT: movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00]
2678 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
2679 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
2681 ; X86-AVX512-LABEL: test_mm_loadu_si16:
2682 ; X86-AVX512: # %bb.0: # %entry
2683 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2684 ; X86-AVX512-NEXT: movzwl (%eax), %eax # encoding: [0x0f,0xb7,0x00]
2685 ; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
2686 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
2688 ; X64-SSE-LABEL: test_mm_loadu_si16:
2689 ; X64-SSE: # %bb.0: # %entry
2690 ; X64-SSE-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
2691 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2692 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2694 ; X64-AVX1-LABEL: test_mm_loadu_si16:
2695 ; X64-AVX1: # %bb.0: # %entry
2696 ; X64-AVX1-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
2697 ; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
2698 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
2700 ; X64-AVX512-LABEL: test_mm_loadu_si16:
2701 ; X64-AVX512: # %bb.0: # %entry
2702 ; X64-AVX512-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07]
2703 ; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
2704 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
2706 ; X32-SSE-LABEL: test_mm_loadu_si16:
2707 ; X32-SSE: # %bb.0: # %entry
2708 ; X32-SSE-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07]
2709 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
2710 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2712 ; X32-AVX1-LABEL: test_mm_loadu_si16:
2713 ; X32-AVX1: # %bb.0: # %entry
2714 ; X32-AVX1-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07]
2715 ; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
2716 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
2718 ; X32-AVX512-LABEL: test_mm_loadu_si16:
2719 ; X32-AVX512: # %bb.0: # %entry
2720 ; X32-AVX512-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07]
2721 ; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
2722 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
2724 %0 = load i16, ptr %A, align 1
2725 %vecinit7.i = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %0, i32 0
2726 %1 = bitcast <8 x i16> %vecinit7.i to <2 x i64>
2730 define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2731 ; SSE-LABEL: test_mm_madd_epi16:
2733 ; SSE-NEXT: pmaddwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf5,0xc1]
2734 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2736 ; AVX1-LABEL: test_mm_madd_epi16:
2738 ; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf5,0xc1]
2739 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2741 ; AVX512-LABEL: test_mm_madd_epi16:
2743 ; AVX512-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
2744 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2745 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2746 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2747 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
2748 %bc = bitcast <4 x i32> %res to <2 x i64>
2751 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
2753 define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, ptr %a2) nounwind {
2754 ; X86-SSE-LABEL: test_mm_maskmoveu_si128:
2756 ; X86-SSE-NEXT: pushl %edi # encoding: [0x57]
2757 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08]
2758 ; X86-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1]
2759 ; X86-SSE-NEXT: popl %edi # encoding: [0x5f]
2760 ; X86-SSE-NEXT: retl # encoding: [0xc3]
2762 ; X86-AVX-LABEL: test_mm_maskmoveu_si128:
2764 ; X86-AVX-NEXT: pushl %edi # encoding: [0x57]
2765 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08]
2766 ; X86-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
2767 ; X86-AVX-NEXT: popl %edi # encoding: [0x5f]
2768 ; X86-AVX-NEXT: retl # encoding: [0xc3]
2770 ; X64-SSE-LABEL: test_mm_maskmoveu_si128:
2772 ; X64-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1]
2773 ; X64-SSE-NEXT: retq # encoding: [0xc3]
2775 ; X64-AVX-LABEL: test_mm_maskmoveu_si128:
2777 ; X64-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1]
2778 ; X64-AVX-NEXT: retq # encoding: [0xc3]
2780 ; X32-SSE-LABEL: test_mm_maskmoveu_si128:
2782 ; X32-SSE-NEXT: # kill: def $edi killed $edi killed $rdi
2783 ; X32-SSE-NEXT: addr32 maskmovdqu %xmm1, %xmm0 # encoding: [0x67,0x66,0x0f,0xf7,0xc1]
2784 ; X32-SSE-NEXT: retq # encoding: [0xc3]
2786 ; X32-AVX-LABEL: test_mm_maskmoveu_si128:
2788 ; X32-AVX-NEXT: # kill: def $edi killed $edi killed $rdi
2789 ; X32-AVX-NEXT: addr32 vmaskmovdqu %xmm1, %xmm0 # encoding: [0x67,0xc5,0xf9,0xf7,0xc1]
2790 ; X32-AVX-NEXT: retq # encoding: [0xc3]
2791 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2792 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2793 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, ptr %a2)
2796 declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, ptr) nounwind
2798 define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2799 ; SSE-LABEL: test_mm_max_epi16:
2801 ; SSE-NEXT: pmaxsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xee,0xc1]
2802 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2804 ; AVX1-LABEL: test_mm_max_epi16:
2806 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xee,0xc1]
2807 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2809 ; AVX512-LABEL: test_mm_max_epi16:
2811 ; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
2812 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2813 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2814 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2815 %sel = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
2816 %bc = bitcast <8 x i16> %sel to <2 x i64>
2819 declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
2821 define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2822 ; SSE-LABEL: test_mm_max_epu8:
2824 ; SSE-NEXT: pmaxub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xde,0xc1]
2825 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2827 ; AVX1-LABEL: test_mm_max_epu8:
2829 ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xde,0xc1]
2830 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2832 ; AVX512-LABEL: test_mm_max_epu8:
2834 ; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
2835 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2836 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2837 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2838 %sel = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
2839 %bc = bitcast <16 x i8> %sel to <2 x i64>
2842 declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
2844 define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2845 ; SSE-LABEL: test_mm_max_pd:
2847 ; SSE-NEXT: maxpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5f,0xc1]
2848 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2850 ; AVX1-LABEL: test_mm_max_pd:
2852 ; AVX1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5f,0xc1]
2853 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2855 ; AVX512-LABEL: test_mm_max_pd:
2857 ; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1]
2858 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2859 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
2860 ret <2 x double> %res
2862 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
2864 define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2865 ; SSE-LABEL: test_mm_max_sd:
2867 ; SSE-NEXT: maxsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5f,0xc1]
2868 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2870 ; AVX1-LABEL: test_mm_max_sd:
2872 ; AVX1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5f,0xc1]
2873 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2875 ; AVX512-LABEL: test_mm_max_sd:
2877 ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1]
2878 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2879 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
2880 ret <2 x double> %res
2882 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
2884 define void @test_mm_mfence() nounwind {
2885 ; CHECK-LABEL: test_mm_mfence:
2887 ; CHECK-NEXT: mfence # encoding: [0x0f,0xae,0xf0]
2888 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2889 call void @llvm.x86.sse2.mfence()
2892 declare void @llvm.x86.sse2.mfence() nounwind readnone
2894 define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2895 ; SSE-LABEL: test_mm_min_epi16:
2897 ; SSE-NEXT: pminsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xea,0xc1]
2898 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2900 ; AVX1-LABEL: test_mm_min_epi16:
2902 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xea,0xc1]
2903 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2905 ; AVX512-LABEL: test_mm_min_epi16:
2907 ; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
2908 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2909 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2910 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2911 %sel = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
2912 %bc = bitcast <8 x i16> %sel to <2 x i64>
2915 declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
2917 define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2918 ; SSE-LABEL: test_mm_min_epu8:
2920 ; SSE-NEXT: pminub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xda,0xc1]
2921 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2923 ; AVX1-LABEL: test_mm_min_epu8:
2925 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xda,0xc1]
2926 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2928 ; AVX512-LABEL: test_mm_min_epu8:
2930 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
2931 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2932 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2933 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2934 %sel = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
2935 %bc = bitcast <16 x i8> %sel to <2 x i64>
2938 declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
2940 define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2941 ; SSE-LABEL: test_mm_min_pd:
2943 ; SSE-NEXT: minpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5d,0xc1]
2944 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2946 ; AVX1-LABEL: test_mm_min_pd:
2948 ; AVX1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5d,0xc1]
2949 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2951 ; AVX512-LABEL: test_mm_min_pd:
2953 ; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1]
2954 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2955 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
2956 ret <2 x double> %res
2958 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
2960 define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2961 ; SSE-LABEL: test_mm_min_sd:
2963 ; SSE-NEXT: minsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5d,0xc1]
2964 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2966 ; AVX1-LABEL: test_mm_min_sd:
2968 ; AVX1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5d,0xc1]
2969 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2971 ; AVX512-LABEL: test_mm_min_sd:
2973 ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1]
2974 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2975 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
2976 ret <2 x double> %res
2978 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
2980 define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
2981 ; SSE-LABEL: test_mm_move_epi64:
2983 ; SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
2984 ; SSE-NEXT: # xmm0 = xmm0[0],zero
2985 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2987 ; AVX1-LABEL: test_mm_move_epi64:
2989 ; AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
2990 ; AVX1-NEXT: # xmm0 = xmm0[0],zero
2991 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2993 ; AVX512-LABEL: test_mm_move_epi64:
2995 ; AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
2996 ; AVX512-NEXT: # xmm0 = xmm0[0],zero
2997 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2998 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
3002 define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3003 ; SSE-LABEL: test_mm_move_sd:
3005 ; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1]
3006 ; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1]
3007 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3009 ; AVX-LABEL: test_mm_move_sd:
3011 ; AVX-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
3012 ; AVX-NEXT: # xmm0 = xmm1[0,1],xmm0[2,3]
3013 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3014 %ext0 = extractelement <2 x double> %a1, i32 0
3015 %res0 = insertelement <2 x double> undef, double %ext0, i32 0
3016 %ext1 = extractelement <2 x double> %a0, i32 1
3017 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
3018 ret <2 x double> %res1
3021 define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
3022 ; SSE-LABEL: test_mm_movemask_epi8:
3024 ; SSE-NEXT: pmovmskb %xmm0, %eax # encoding: [0x66,0x0f,0xd7,0xc0]
3025 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3027 ; AVX-LABEL: test_mm_movemask_epi8:
3029 ; AVX-NEXT: vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0]
3030 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3031 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3032 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
3035 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
3037 define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
3038 ; SSE-LABEL: test_mm_movemask_pd:
3040 ; SSE-NEXT: movmskpd %xmm0, %eax # encoding: [0x66,0x0f,0x50,0xc0]
3041 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3043 ; AVX-LABEL: test_mm_movemask_pd:
3045 ; AVX-NEXT: vmovmskpd %xmm0, %eax # encoding: [0xc5,0xf9,0x50,0xc0]
3046 ; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3047 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
3050 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
3052 define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3053 ; SSE-LABEL: test_mm_mul_epu32:
3055 ; SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
3056 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3058 ; AVX1-LABEL: test_mm_mul_epu32:
3060 ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf4,0xc1]
3061 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3063 ; AVX512-LABEL: test_mm_mul_epu32:
3065 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
3066 ; AVX512-NEXT: vpblendd $10, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc2,0x0a]
3067 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
3068 ; AVX512-NEXT: vpblendd $10, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x02,0xca,0x0a]
3069 ; AVX512-NEXT: # xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
3070 ; AVX512-NEXT: vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
3071 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3072 %A = and <2 x i64> %a0, <i64 4294967295, i64 4294967295>
3073 %B = and <2 x i64> %a1, <i64 4294967295, i64 4294967295>
3074 %res = mul nuw <2 x i64> %A, %B
3078 define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3079 ; SSE-LABEL: test_mm_mul_pd:
3081 ; SSE-NEXT: mulpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x59,0xc1]
3082 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3084 ; AVX1-LABEL: test_mm_mul_pd:
3086 ; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x59,0xc1]
3087 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3089 ; AVX512-LABEL: test_mm_mul_pd:
3091 ; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x59,0xc1]
3092 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3093 %res = fmul <2 x double> %a0, %a1
3094 ret <2 x double> %res
3097 define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3098 ; SSE-LABEL: test_mm_mul_sd:
3100 ; SSE-NEXT: mulsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x59,0xc1]
3101 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3103 ; AVX1-LABEL: test_mm_mul_sd:
3105 ; AVX1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x59,0xc1]
3106 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3108 ; AVX512-LABEL: test_mm_mul_sd:
3110 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1]
3111 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3112 %ext0 = extractelement <2 x double> %a0, i32 0
3113 %ext1 = extractelement <2 x double> %a1, i32 0
3114 %fmul = fmul double %ext0, %ext1
3115 %res = insertelement <2 x double> %a0, double %fmul, i32 0
3116 ret <2 x double> %res
3119 define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3120 ; SSE-LABEL: test_mm_mulhi_epi16:
3122 ; SSE-NEXT: pmulhw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe5,0xc1]
3123 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3125 ; AVX1-LABEL: test_mm_mulhi_epi16:
3127 ; AVX1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe5,0xc1]
3128 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3130 ; AVX512-LABEL: test_mm_mulhi_epi16:
3132 ; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
3133 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3134 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3135 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3136 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
3137 %bc = bitcast <8 x i16> %res to <2 x i64>
3140 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
3142 define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
3143 ; SSE-LABEL: test_mm_mulhi_epu16:
3145 ; SSE-NEXT: pmulhuw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe4,0xc1]
3146 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3148 ; AVX1-LABEL: test_mm_mulhi_epu16:
3150 ; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe4,0xc1]
3151 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3153 ; AVX512-LABEL: test_mm_mulhi_epu16:
3155 ; AVX512-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
3156 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3157 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3158 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3159 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
3160 %bc = bitcast <8 x i16> %res to <2 x i64>
3163 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
3165 define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3166 ; SSE-LABEL: test_mm_mullo_epi16:
3168 ; SSE-NEXT: pmullw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd5,0xc1]
3169 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3171 ; AVX1-LABEL: test_mm_mullo_epi16:
3173 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd5,0xc1]
3174 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3176 ; AVX512-LABEL: test_mm_mullo_epi16:
3178 ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1]
3179 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3180 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3181 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3182 %res = mul <8 x i16> %arg0, %arg1
3183 %bc = bitcast <8 x i16> %res to <2 x i64>
3187 define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3188 ; SSE-LABEL: test_mm_or_pd:
3190 ; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1]
3191 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3193 ; AVX1-LABEL: test_mm_or_pd:
3195 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
3196 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3198 ; AVX512-LABEL: test_mm_or_pd:
3200 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
3201 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3202 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
3203 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
3204 %res = or <4 x i32> %arg0, %arg1
3205 %bc = bitcast <4 x i32> %res to <2 x double>
3206 ret <2 x double> %bc
3209 define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3210 ; SSE-LABEL: test_mm_or_si128:
3212 ; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1]
3213 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3215 ; AVX1-LABEL: test_mm_or_si128:
3217 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
3218 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3220 ; AVX512-LABEL: test_mm_or_si128:
3222 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
3223 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3224 %res = or <2 x i64> %a0, %a1
3228 define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3229 ; SSE-LABEL: test_mm_packs_epi16:
3231 ; SSE-NEXT: packsswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x63,0xc1]
3232 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3234 ; AVX1-LABEL: test_mm_packs_epi16:
3236 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x63,0xc1]
3237 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3239 ; AVX512-LABEL: test_mm_packs_epi16:
3241 ; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
3242 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3243 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3244 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3245 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
3246 %bc = bitcast <16 x i8> %res to <2 x i64>
3249 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
3251 define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3252 ; SSE-LABEL: test_mm_packs_epi32:
3254 ; SSE-NEXT: packssdw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6b,0xc1]
3255 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3257 ; AVX1-LABEL: test_mm_packs_epi32:
3259 ; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x6b,0xc1]
3260 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3262 ; AVX512-LABEL: test_mm_packs_epi32:
3264 ; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
3265 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3266 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3267 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3268 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
3269 %bc = bitcast <8 x i16> %res to <2 x i64>
3272 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
3274 define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3275 ; SSE-LABEL: test_mm_packus_epi16:
3277 ; SSE-NEXT: packuswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x67,0xc1]
3278 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3280 ; AVX1-LABEL: test_mm_packus_epi16:
3282 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x67,0xc1]
3283 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3285 ; AVX512-LABEL: test_mm_packus_epi16:
3287 ; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
3288 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3289 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3290 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3291 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
3292 %bc = bitcast <16 x i8> %res to <2 x i64>
3295 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
3297 define void @test_mm_pause() nounwind {
3298 ; CHECK-LABEL: test_mm_pause:
3300 ; CHECK-NEXT: pause # encoding: [0xf3,0x90]
3301 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3302 call void @llvm.x86.sse2.pause()
3305 declare void @llvm.x86.sse2.pause() nounwind readnone
3307 define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3308 ; SSE-LABEL: test_mm_sad_epu8:
3310 ; SSE-NEXT: psadbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf6,0xc1]
3311 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3313 ; AVX1-LABEL: test_mm_sad_epu8:
3315 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf6,0xc1]
3316 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3318 ; AVX512-LABEL: test_mm_sad_epu8:
3320 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1]
3321 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3322 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3323 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3324 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
3327 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
3329 define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
3330 ; X86-SSE-LABEL: test_mm_set_epi8:
3332 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3333 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3334 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3335 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3336 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3337 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3338 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
3339 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3340 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3341 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3342 ; X86-SSE-NEXT: punpcklbw %xmm2, %xmm0 # encoding: [0x66,0x0f,0x60,0xc2]
3343 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
3344 ; X86-SSE-NEXT: punpcklwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x61,0xc1]
3345 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3346 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
3347 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3348 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3349 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3350 ; X86-SSE-NEXT: punpcklbw %xmm1, %xmm2 # encoding: [0x66,0x0f,0x60,0xd1]
3351 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
3352 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
3353 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3354 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3355 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3356 ; X86-SSE-NEXT: punpcklbw %xmm3, %xmm1 # encoding: [0x66,0x0f,0x60,0xcb]
3357 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
3358 ; X86-SSE-NEXT: punpcklwd %xmm2, %xmm1 # encoding: [0x66,0x0f,0x61,0xca]
3359 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
3360 ; X86-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
3361 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3362 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
3363 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3364 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3365 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3366 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3367 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3368 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
3369 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3370 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3371 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3372 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3373 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3374 ; X86-SSE-NEXT: punpcklwd %xmm3, %xmm2 # encoding: [0x66,0x0f,0x61,0xd3]
3375 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
3376 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
3377 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3378 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3379 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3380 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3381 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3382 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
3383 ; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
3384 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3385 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3386 ; X86-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
3387 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
3388 ; X86-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
3389 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
3390 ; X86-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
3391 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3392 ; X86-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3393 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
3394 ; X86-SSE-NEXT: retl # encoding: [0xc3]
3396 ; X86-AVX1-LABEL: test_mm_set_epi8:
3397 ; X86-AVX1: # %bb.0:
3398 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3399 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3400 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
3401 ; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
3402 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3403 ; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3404 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
3405 ; X86-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3406 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3407 ; X86-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3408 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
3409 ; X86-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3410 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3411 ; X86-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3412 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
3413 ; X86-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3414 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3415 ; X86-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3416 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
3417 ; X86-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3418 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3419 ; X86-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
3420 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
3421 ; X86-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
3422 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3423 ; X86-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
3424 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
3425 ; X86-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
3426 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3427 ; X86-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
3428 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3429 ; X86-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
3430 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
3432 ; X86-AVX512-LABEL: test_mm_set_epi8:
3433 ; X86-AVX512: # %bb.0:
3434 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3435 ; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3436 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
3437 ; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
3438 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3439 ; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3440 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
3441 ; X86-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3442 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3443 ; X86-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3444 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
3445 ; X86-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3446 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3447 ; X86-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3448 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
3449 ; X86-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3450 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3451 ; X86-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3452 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
3453 ; X86-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3454 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3455 ; X86-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
3456 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
3457 ; X86-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
3458 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3459 ; X86-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
3460 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
3461 ; X86-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
3462 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3463 ; X86-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
3464 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3465 ; X86-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
3466 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
3468 ; X64-SSE-LABEL: test_mm_set_epi8:
3470 ; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3471 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3472 ; X64-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
3473 ; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3474 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3475 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3476 ; X64-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
3477 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3478 ; X64-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3479 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3480 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3481 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3482 ; X64-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
3483 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3484 ; X64-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
3485 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3486 ; X64-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
3487 ; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3488 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3489 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3490 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3491 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3492 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3493 ; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3494 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3495 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3496 ; X64-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
3497 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3498 ; X64-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
3499 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3500 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3501 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3502 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3503 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3504 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3505 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3506 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3507 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3508 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3509 ; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3510 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3511 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3512 ; X64-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
3513 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
3514 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3515 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3516 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3517 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3518 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3519 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3520 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
3521 ; X64-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
3522 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
3523 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3524 ; X64-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
3525 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
3526 ; X64-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
3527 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
3528 ; X64-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
3529 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3530 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3531 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
3532 ; X64-SSE-NEXT: retq # encoding: [0xc3]
3534 ; X64-AVX1-LABEL: test_mm_set_epi8:
3535 ; X64-AVX1: # %bb.0:
3536 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
3537 ; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3538 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
3539 ; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
3540 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3541 ; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3542 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3543 ; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3544 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3545 ; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3546 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3547 ; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3548 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3549 ; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3550 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3551 ; X64-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3552 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3553 ; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3554 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3555 ; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3556 ; X64-AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a]
3557 ; X64-AVX1-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b]
3558 ; X64-AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c]
3559 ; X64-AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d]
3560 ; X64-AVX1-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e]
3561 ; X64-AVX1-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f]
3562 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
3564 ; X64-AVX512-LABEL: test_mm_set_epi8:
3565 ; X64-AVX512: # %bb.0:
3566 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
3567 ; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3568 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
3569 ; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
3570 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
3571 ; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3572 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
3573 ; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3574 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
3575 ; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3576 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
3577 ; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3578 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
3579 ; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3580 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
3581 ; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3582 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
3583 ; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3584 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3585 ; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3586 ; X64-AVX512-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a]
3587 ; X64-AVX512-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b]
3588 ; X64-AVX512-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c]
3589 ; X64-AVX512-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d]
3590 ; X64-AVX512-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e]
3591 ; X64-AVX512-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f]
3592 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
3594 ; X32-SSE-LABEL: test_mm_set_epi8:
3596 ; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
3597 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3598 ; X32-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
3599 ; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3600 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3601 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3602 ; X32-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
3603 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3604 ; X32-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
3605 ; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3606 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3607 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3608 ; X32-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
3609 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3610 ; X32-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
3611 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3612 ; X32-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
3613 ; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3614 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3615 ; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3616 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
3617 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3618 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
3619 ; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3620 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
3621 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3622 ; X32-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
3623 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
3624 ; X32-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
3625 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3626 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
3627 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3628 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
3629 ; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3630 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3631 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3632 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
3633 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3634 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
3635 ; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3636 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
3637 ; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
3638 ; X32-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
3639 ; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
3640 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
3641 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3642 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
3643 ; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3644 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
3645 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
3646 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
3647 ; X32-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
3648 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
3649 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3650 ; X32-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
3651 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
3652 ; X32-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
3653 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
3654 ; X32-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
3655 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3656 ; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3657 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
3658 ; X32-SSE-NEXT: retq # encoding: [0xc3]
3660 ; X32-AVX1-LABEL: test_mm_set_epi8:
3661 ; X32-AVX1: # %bb.0:
3662 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
3663 ; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3664 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
3665 ; X32-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
3666 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
3667 ; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3668 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
3669 ; X32-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3670 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
3671 ; X32-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3672 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
3673 ; X32-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3674 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
3675 ; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3676 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
3677 ; X32-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3678 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
3679 ; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3680 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
3681 ; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3682 ; X32-AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a]
3683 ; X32-AVX1-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b]
3684 ; X32-AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c]
3685 ; X32-AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d]
3686 ; X32-AVX1-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e]
3687 ; X32-AVX1-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f]
3688 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
3690 ; X32-AVX512-LABEL: test_mm_set_epi8:
3691 ; X32-AVX512: # %bb.0:
3692 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
3693 ; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3694 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
3695 ; X32-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
3696 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
3697 ; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
3698 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
3699 ; X32-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
3700 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
3701 ; X32-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
3702 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
3703 ; X32-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
3704 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
3705 ; X32-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
3706 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
3707 ; X32-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
3708 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
3709 ; X32-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
3710 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
3711 ; X32-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
3712 ; X32-AVX512-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a]
3713 ; X32-AVX512-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b]
3714 ; X32-AVX512-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c]
3715 ; X32-AVX512-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d]
3716 ; X32-AVX512-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e]
3717 ; X32-AVX512-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f]
3718 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
3719 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
3720 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
3721 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2
3722 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3
3723 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4
3724 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5
3725 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6
3726 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7
3727 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8
3728 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9
3729 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10
3730 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
3731 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
3732 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
3733 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
3734 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
3735 %res = bitcast <16 x i8> %res15 to <2 x i64>
3739 define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
3740 ; X86-SSE-LABEL: test_mm_set_epi16:
3742 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3743 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
3744 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3745 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
3746 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
3747 ; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
3748 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3749 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
3750 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
3751 ; X86-SSE-NEXT: movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8]
3752 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
3753 ; X86-SSE-NEXT: movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0]
3754 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
3755 ; X86-SSE-NEXT: movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8]
3756 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
3757 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3758 ; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
3759 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3760 ; X86-SSE-NEXT: punpcklwd %xmm4, %xmm3 # encoding: [0x66,0x0f,0x61,0xdc]
3761 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
3762 ; X86-SSE-NEXT: punpckldq %xmm2, %xmm3 # encoding: [0x66,0x0f,0x62,0xda]
3763 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
3764 ; X86-SSE-NEXT: punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5]
3765 ; X86-SSE-NEXT: # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
3766 ; X86-SSE-NEXT: punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7]
3767 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
3768 ; X86-SSE-NEXT: punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6]
3769 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
3770 ; X86-SSE-NEXT: punpcklqdq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc3]
3771 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0]
3772 ; X86-SSE-NEXT: retl # encoding: [0xc3]
3774 ; X86-AVX1-LABEL: test_mm_set_epi16:
3775 ; X86-AVX1: # %bb.0:
3776 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
3777 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3778 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
3779 ; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
3780 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
3781 ; X86-AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
3782 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
3783 ; X86-AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
3784 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3785 ; X86-AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
3786 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
3787 ; X86-AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
3788 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3789 ; X86-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
3790 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3791 ; X86-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
3792 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
3794 ; X86-AVX512-LABEL: test_mm_set_epi16:
3795 ; X86-AVX512: # %bb.0:
3796 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
3797 ; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3798 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
3799 ; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
3800 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
3801 ; X86-AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
3802 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
3803 ; X86-AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
3804 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3805 ; X86-AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
3806 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
3807 ; X86-AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
3808 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
3809 ; X86-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
3810 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
3811 ; X86-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
3812 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
3814 ; X64-SSE-LABEL: test_mm_set_epi16:
3816 ; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3817 ; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
3818 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3819 ; X64-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
3820 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
3821 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3822 ; X64-SSE-NEXT: movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2]
3823 ; X64-SSE-NEXT: movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1]
3824 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
3825 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
3826 ; X64-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
3827 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3828 ; X64-SSE-NEXT: movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0]
3829 ; X64-SSE-NEXT: movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9]
3830 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
3831 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3832 ; X64-SSE-NEXT: movd %r10d, %xmm3 # encoding: [0x66,0x41,0x0f,0x6e,0xda]
3833 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3834 ; X64-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
3835 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
3836 ; X64-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
3837 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3838 ; X64-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
3839 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0]
3840 ; X64-SSE-NEXT: retq # encoding: [0xc3]
3842 ; X64-AVX1-LABEL: test_mm_set_epi16:
3843 ; X64-AVX1: # %bb.0:
3844 ; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3845 ; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
3846 ; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3847 ; X64-AVX1-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
3848 ; X64-AVX1-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
3849 ; X64-AVX1-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
3850 ; X64-AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
3851 ; X64-AVX1-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
3852 ; X64-AVX1-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
3853 ; X64-AVX1-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
3854 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
3856 ; X64-AVX512-LABEL: test_mm_set_epi16:
3857 ; X64-AVX512: # %bb.0:
3858 ; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
3859 ; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
3860 ; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3861 ; X64-AVX512-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
3862 ; X64-AVX512-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
3863 ; X64-AVX512-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
3864 ; X64-AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
3865 ; X64-AVX512-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
3866 ; X64-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
3867 ; X64-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
3868 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
3870 ; X32-SSE-LABEL: test_mm_set_epi16:
3872 ; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10]
3873 ; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08]
3874 ; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3875 ; X32-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
3876 ; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
3877 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3878 ; X32-SSE-NEXT: movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2]
3879 ; X32-SSE-NEXT: movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1]
3880 ; X32-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
3881 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
3882 ; X32-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
3883 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3884 ; X32-SSE-NEXT: movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0]
3885 ; X32-SSE-NEXT: movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9]
3886 ; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
3887 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3888 ; X32-SSE-NEXT: movd %r10d, %xmm3 # encoding: [0x66,0x41,0x0f,0x6e,0xda]
3889 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
3890 ; X32-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
3891 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
3892 ; X32-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
3893 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3894 ; X32-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
3895 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0]
3896 ; X32-SSE-NEXT: retq # encoding: [0xc3]
3898 ; X32-AVX1-LABEL: test_mm_set_epi16:
3899 ; X32-AVX1: # %bb.0:
3900 ; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10]
3901 ; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08]
3902 ; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
3903 ; X32-AVX1-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
3904 ; X32-AVX1-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
3905 ; X32-AVX1-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
3906 ; X32-AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
3907 ; X32-AVX1-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
3908 ; X32-AVX1-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
3909 ; X32-AVX1-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
3910 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
3912 ; X32-AVX512-LABEL: test_mm_set_epi16:
3913 ; X32-AVX512: # %bb.0:
3914 ; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10]
3915 ; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08]
3916 ; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
3917 ; X32-AVX512-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01]
3918 ; X32-AVX512-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02]
3919 ; X32-AVX512-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03]
3920 ; X32-AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
3921 ; X32-AVX512-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05]
3922 ; X32-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06]
3923 ; X32-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
3924 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
3925 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
3926 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
3927 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2
3928 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3
3929 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4
3930 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5
3931 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6
3932 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
3933 %res = bitcast <8 x i16> %res7 to <2 x i64>
3937 define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
3938 ; X86-SSE-LABEL: test_mm_set_epi32:
3940 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3941 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
3942 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3943 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08]
3944 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
3945 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3946 ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3947 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x0c]
3948 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3949 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
3950 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
3951 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3952 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
3953 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
3954 ; X86-SSE-NEXT: retl # encoding: [0xc3]
3956 ; X86-AVX1-LABEL: test_mm_set_epi32:
3957 ; X86-AVX1: # %bb.0:
3958 ; X86-AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3959 ; X86-AVX1-NEXT: # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10]
3960 ; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01]
3961 ; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02]
3962 ; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03]
3963 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
3965 ; X86-AVX512-LABEL: test_mm_set_epi32:
3966 ; X86-AVX512: # %bb.0:
3967 ; X86-AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3968 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10]
3969 ; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01]
3970 ; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02]
3971 ; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03]
3972 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
3974 ; X64-SSE-LABEL: test_mm_set_epi32:
3976 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
3977 ; X64-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
3978 ; X64-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
3979 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3980 ; X64-SSE-NEXT: movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2]
3981 ; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
3982 ; X64-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
3983 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3984 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
3985 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
3986 ; X64-SSE-NEXT: retq # encoding: [0xc3]
3988 ; X64-AVX1-LABEL: test_mm_set_epi32:
3989 ; X64-AVX1: # %bb.0:
3990 ; X64-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
3991 ; X64-AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
3992 ; X64-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
3993 ; X64-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
3994 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
3996 ; X64-AVX512-LABEL: test_mm_set_epi32:
3997 ; X64-AVX512: # %bb.0:
3998 ; X64-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
3999 ; X64-AVX512-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
4000 ; X64-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
4001 ; X64-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
4002 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4004 ; X32-SSE-LABEL: test_mm_set_epi32:
4006 ; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
4007 ; X32-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce]
4008 ; X32-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
4009 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4010 ; X32-SSE-NEXT: movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2]
4011 ; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
4012 ; X32-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
4013 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4014 ; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4015 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
4016 ; X32-SSE-NEXT: retq # encoding: [0xc3]
4018 ; X32-AVX1-LABEL: test_mm_set_epi32:
4019 ; X32-AVX1: # %bb.0:
4020 ; X32-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
4021 ; X32-AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
4022 ; X32-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
4023 ; X32-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
4024 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
4026 ; X32-AVX512-LABEL: test_mm_set_epi32:
4027 ; X32-AVX512: # %bb.0:
4028 ; X32-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
4029 ; X32-AVX512-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01]
4030 ; X32-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02]
4031 ; X32-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
4032 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
4033 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
4034 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
4035 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2
4036 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
4037 %res = bitcast <4 x i32> %res3 to <2 x i64>
4041 ; TODO test_mm_set_epi64
4043 define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
4044 ; X86-SSE-LABEL: test_mm_set_epi64x:
4046 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
4047 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04]
4048 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4049 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08]
4050 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
4051 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4052 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4053 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x0c]
4054 ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
4055 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x10]
4056 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
4057 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4058 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
4059 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
4060 ; X86-SSE-NEXT: retl # encoding: [0xc3]
4062 ; X86-AVX1-LABEL: test_mm_set_epi64x:
4063 ; X86-AVX1: # %bb.0:
4064 ; X86-AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
4065 ; X86-AVX1-NEXT: # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c]
4066 ; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01]
4067 ; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02]
4068 ; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03]
4069 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
4071 ; X86-AVX512-LABEL: test_mm_set_epi64x:
4072 ; X86-AVX512: # %bb.0:
4073 ; X86-AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
4074 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c]
4075 ; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01]
4076 ; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02]
4077 ; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03]
4078 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
4080 ; X64-SSE-LABEL: test_mm_set_epi64x:
4082 ; X64-SSE-NEXT: movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf]
4083 ; X64-SSE-NEXT: movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6]
4084 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4085 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
4086 ; X64-SSE-NEXT: retq # encoding: [0xc3]
4088 ; X64-AVX1-LABEL: test_mm_set_epi64x:
4089 ; X64-AVX1: # %bb.0:
4090 ; X64-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
4091 ; X64-AVX1-NEXT: vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
4092 ; X64-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
4093 ; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0]
4094 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
4096 ; X64-AVX512-LABEL: test_mm_set_epi64x:
4097 ; X64-AVX512: # %bb.0:
4098 ; X64-AVX512-NEXT: vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
4099 ; X64-AVX512-NEXT: vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
4100 ; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
4101 ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
4102 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4104 ; X32-SSE-LABEL: test_mm_set_epi64x:
4106 ; X32-SSE-NEXT: movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf]
4107 ; X32-SSE-NEXT: movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6]
4108 ; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4109 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
4110 ; X32-SSE-NEXT: retq # encoding: [0xc3]
4112 ; X32-AVX1-LABEL: test_mm_set_epi64x:
4113 ; X32-AVX1: # %bb.0:
4114 ; X32-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
4115 ; X32-AVX1-NEXT: vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
4116 ; X32-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
4117 ; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0]
4118 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
4120 ; X32-AVX512-LABEL: test_mm_set_epi64x:
4121 ; X32-AVX512: # %bb.0:
4122 ; X32-AVX512-NEXT: vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
4123 ; X32-AVX512-NEXT: vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce]
4124 ; X32-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
4125 ; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
4126 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
4127 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0
4128 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
4132 define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
4133 ; X86-SSE-LABEL: test_mm_set_pd:
4135 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
4136 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x0c]
4137 ; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
4138 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x04]
4139 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
4140 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
4141 ; X86-SSE-NEXT: retl # encoding: [0xc3]
4143 ; X86-AVX1-LABEL: test_mm_set_pd:
4144 ; X86-AVX1: # %bb.0:
4145 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
4146 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
4147 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
4148 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
4149 ; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
4150 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0]
4151 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
4153 ; X86-AVX512-LABEL: test_mm_set_pd:
4154 ; X86-AVX512: # %bb.0:
4155 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
4156 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
4157 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
4158 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
4159 ; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
4160 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0]
4161 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
4163 ; X64-SSE-LABEL: test_mm_set_pd:
4165 ; X64-SSE-NEXT: movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8]
4166 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0]
4167 ; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
4168 ; X64-SSE-NEXT: retq # encoding: [0xc3]
4170 ; X64-AVX1-LABEL: test_mm_set_pd:
4171 ; X64-AVX1: # %bb.0:
4172 ; X64-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0]
4173 ; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0]
4174 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
4176 ; X64-AVX512-LABEL: test_mm_set_pd:
4177 ; X64-AVX512: # %bb.0:
4178 ; X64-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
4179 ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
4180 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4182 ; X32-SSE-LABEL: test_mm_set_pd:
4184 ; X32-SSE-NEXT: movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8]
4185 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0]
4186 ; X32-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
4187 ; X32-SSE-NEXT: retq # encoding: [0xc3]
4189 ; X32-AVX1-LABEL: test_mm_set_pd:
4190 ; X32-AVX1: # %bb.0:
4191 ; X32-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0]
4192 ; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0]
4193 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
4195 ; X32-AVX512-LABEL: test_mm_set_pd:
4196 ; X32-AVX512: # %bb.0:
4197 ; X32-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
4198 ; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
4199 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
4200 %res0 = insertelement <2 x double> undef, double %a1, i32 0
4201 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
4202 ret <2 x double> %res1
4205 define <2 x double> @test_mm_set_pd1(double %a0) nounwind {
4206 ; X86-SSE-LABEL: test_mm_set_pd1:
4208 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
4209 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
4210 ; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
4211 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0]
4212 ; X86-SSE-NEXT: retl # encoding: [0xc3]
4214 ; X86-AVX1-LABEL: test_mm_set_pd1:
4215 ; X86-AVX1: # %bb.0:
4216 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
4217 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
4218 ; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
4219 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0]
4220 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
4222 ; X86-AVX512-LABEL: test_mm_set_pd1:
4223 ; X86-AVX512: # %bb.0:
4224 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
4225 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
4226 ; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
4227 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,0]
4228 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
4230 ; X64-SSE-LABEL: test_mm_set_pd1:
4232 ; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
4233 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0]
4234 ; X64-SSE-NEXT: retq # encoding: [0xc3]
4236 ; X64-AVX1-LABEL: test_mm_set_pd1:
4237 ; X64-AVX1: # %bb.0:
4238 ; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
4239 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0]
4240 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
4242 ; X64-AVX512-LABEL: test_mm_set_pd1:
4243 ; X64-AVX512: # %bb.0:
4244 ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
4245 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0]
4246 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4248 ; X32-SSE-LABEL: test_mm_set_pd1:
4250 ; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
4251 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0]
4252 ; X32-SSE-NEXT: retq # encoding: [0xc3]
4254 ; X32-AVX1-LABEL: test_mm_set_pd1:
4255 ; X32-AVX1: # %bb.0:
4256 ; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
4257 ; X32-AVX1-NEXT: # xmm0 = xmm0[0,0]
4258 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
4260 ; X32-AVX512-LABEL: test_mm_set_pd1:
4261 ; X32-AVX512: # %bb.0:
4262 ; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
4263 ; X32-AVX512-NEXT: # xmm0 = xmm0[0,0]
4264 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
4265 %res0 = insertelement <2 x double> undef, double %a0, i32 0
4266 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
4267 ret <2 x double> %res1
4270 define <2 x double> @test_mm_set_sd(double %a0) nounwind {
4271 ; X86-SSE-LABEL: test_mm_set_sd:
4273 ; X86-SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
4274 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x7e,0x44,0x24,0x04]
4275 ; X86-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
4276 ; X86-SSE-NEXT: # xmm0 = xmm0[0],zero
4277 ; X86-SSE-NEXT: retl # encoding: [0xc3]
4279 ; X86-AVX1-LABEL: test_mm_set_sd:
4280 ; X86-AVX1: # %bb.0:
4281 ; X86-AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
4282 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04]
4283 ; X86-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
4284 ; X86-AVX1-NEXT: # xmm0 = xmm0[0],zero
4285 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
4287 ; X86-AVX512-LABEL: test_mm_set_sd:
4288 ; X86-AVX512: # %bb.0:
4289 ; X86-AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
4290 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04]
4291 ; X86-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
4292 ; X86-AVX512-NEXT: # xmm0 = xmm0[0],zero
4293 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
4295 ; X64-SSE-LABEL: test_mm_set_sd:
4297 ; X64-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
4298 ; X64-SSE-NEXT: # xmm0 = xmm0[0],zero
4299 ; X64-SSE-NEXT: retq # encoding: [0xc3]
4301 ; X64-AVX1-LABEL: test_mm_set_sd:
4302 ; X64-AVX1: # %bb.0:
4303 ; X64-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
4304 ; X64-AVX1-NEXT: # xmm0 = xmm0[0],zero
4305 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
4307 ; X64-AVX512-LABEL: test_mm_set_sd:
4308 ; X64-AVX512: # %bb.0:
4309 ; X64-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
4310 ; X64-AVX512-NEXT: # xmm0 = xmm0[0],zero
4311 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4313 ; X32-SSE-LABEL: test_mm_set_sd:
4315 ; X32-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0]
4316 ; X32-SSE-NEXT: # xmm0 = xmm0[0],zero
4317 ; X32-SSE-NEXT: retq # encoding: [0xc3]
4319 ; X32-AVX1-LABEL: test_mm_set_sd:
4320 ; X32-AVX1: # %bb.0:
4321 ; X32-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0]
4322 ; X32-AVX1-NEXT: # xmm0 = xmm0[0],zero
4323 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
4325 ; X32-AVX512-LABEL: test_mm_set_sd:
4326 ; X32-AVX512: # %bb.0:
4327 ; X32-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0]
4328 ; X32-AVX512-NEXT: # xmm0 = xmm0[0],zero
4329 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
4330 %res0 = insertelement <2 x double> undef, double %a0, i32 0
4331 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
4332 ret <2 x double> %res1
4335 define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
4336 ; X86-SSE-LABEL: test_mm_set1_epi8:
4338 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4339 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4340 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0]
4341 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
4342 ; X86-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
4343 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4344 ; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4345 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
4346 ; X86-SSE-NEXT: retl # encoding: [0xc3]
4348 ; X86-AVX1-LABEL: test_mm_set1_epi8:
4349 ; X86-AVX1: # %bb.0:
4350 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4351 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
4352 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
4353 ; X86-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
4354 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
4356 ; X86-AVX512-LABEL: test_mm_set1_epi8:
4357 ; X86-AVX512: # %bb.0:
4358 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4359 ; X86-AVX512-NEXT: vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0]
4360 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
4362 ; X64-SSE-LABEL: test_mm_set1_epi8:
4364 ; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
4365 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4366 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0]
4367 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
4368 ; X64-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
4369 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4370 ; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4371 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
4372 ; X64-SSE-NEXT: retq # encoding: [0xc3]
4374 ; X64-AVX1-LABEL: test_mm_set1_epi8:
4375 ; X64-AVX1: # %bb.0:
4376 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4377 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
4378 ; X64-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
4379 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
4381 ; X64-AVX512-LABEL: test_mm_set1_epi8:
4382 ; X64-AVX512: # %bb.0:
4383 ; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7]
4384 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4386 ; X32-SSE-LABEL: test_mm_set1_epi8:
4388 ; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
4389 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4390 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0]
4391 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
4392 ; X32-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
4393 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4394 ; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4395 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
4396 ; X32-SSE-NEXT: retq # encoding: [0xc3]
4398 ; X32-AVX1-LABEL: test_mm_set1_epi8:
4399 ; X32-AVX1: # %bb.0:
4400 ; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4401 ; X32-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
4402 ; X32-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1]
4403 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
4405 ; X32-AVX512-LABEL: test_mm_set1_epi8:
4406 ; X32-AVX512: # %bb.0:
4407 ; X32-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7]
4408 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
4409 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0
4410 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1
4411 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2
4412 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3
4413 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4
4414 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5
4415 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6
4416 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7
4417 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8
4418 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9
4419 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10
4420 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
4421 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
4422 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
4423 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
4424 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
4425 %res = bitcast <16 x i8> %res15 to <2 x i64>
4429 define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
4430 ; X86-SSE-LABEL: test_mm_set1_epi16:
4432 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
4433 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4434 ; X86-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
4435 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4436 ; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4437 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
4438 ; X86-SSE-NEXT: retl # encoding: [0xc3]
4440 ; X86-AVX1-LABEL: test_mm_set1_epi16:
4441 ; X86-AVX1: # %bb.0:
4442 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
4443 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
4444 ; X86-AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00]
4445 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4446 ; X86-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
4447 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
4448 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
4450 ; X86-AVX512-LABEL: test_mm_set1_epi16:
4451 ; X86-AVX512: # %bb.0:
4452 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
4453 ; X86-AVX512-NEXT: vpbroadcastw %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc0]
4454 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
4456 ; X64-SSE-LABEL: test_mm_set1_epi16:
4458 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
4459 ; X64-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
4460 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4461 ; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4462 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
4463 ; X64-SSE-NEXT: retq # encoding: [0xc3]
4465 ; X64-AVX1-LABEL: test_mm_set1_epi16:
4466 ; X64-AVX1: # %bb.0:
4467 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4468 ; X64-AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00]
4469 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4470 ; X64-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
4471 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
4472 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
4474 ; X64-AVX512-LABEL: test_mm_set1_epi16:
4475 ; X64-AVX512: # %bb.0:
4476 ; X64-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7]
4477 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4479 ; X32-SSE-LABEL: test_mm_set1_epi16:
4481 ; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
4482 ; X32-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
4483 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4484 ; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4485 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
4486 ; X32-SSE-NEXT: retq # encoding: [0xc3]
4488 ; X32-AVX1-LABEL: test_mm_set1_epi16:
4489 ; X32-AVX1: # %bb.0:
4490 ; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4491 ; X32-AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00]
4492 ; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
4493 ; X32-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
4494 ; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
4495 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
4497 ; X32-AVX512-LABEL: test_mm_set1_epi16:
4498 ; X32-AVX512: # %bb.0:
4499 ; X32-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7]
4500 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
4501 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
4502 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1
4503 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2
4504 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3
4505 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4
4506 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5
4507 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6
4508 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
4509 %res = bitcast <8 x i16> %res7 to <2 x i64>
4513 define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
4514 ; X86-SSE-LABEL: test_mm_set1_epi32:
4516 ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
4517 ; X86-SSE-NEXT: # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04]
4518 ; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4519 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
4520 ; X86-SSE-NEXT: retl # encoding: [0xc3]
4522 ; X86-AVX1-LABEL: test_mm_set1_epi32:
4523 ; X86-AVX1: # %bb.0:
4524 ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4525 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04]
4526 ; X86-AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
4527 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
4528 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
4530 ; X86-AVX512-LABEL: test_mm_set1_epi32:
4531 ; X86-AVX512: # %bb.0:
4532 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4533 ; X86-AVX512-NEXT: vpbroadcastd %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc0]
4534 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
4536 ; X64-SSE-LABEL: test_mm_set1_epi32:
4538 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
4539 ; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4540 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
4541 ; X64-SSE-NEXT: retq # encoding: [0xc3]
4543 ; X64-AVX1-LABEL: test_mm_set1_epi32:
4544 ; X64-AVX1: # %bb.0:
4545 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4546 ; X64-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
4547 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
4548 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
4550 ; X64-AVX512-LABEL: test_mm_set1_epi32:
4551 ; X64-AVX512: # %bb.0:
4552 ; X64-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7]
4553 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4555 ; X32-SSE-LABEL: test_mm_set1_epi32:
4557 ; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
4558 ; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
4559 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
4560 ; X32-SSE-NEXT: retq # encoding: [0xc3]
4562 ; X32-AVX1-LABEL: test_mm_set1_epi32:
4563 ; X32-AVX1: # %bb.0:
4564 ; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4565 ; X32-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00]
4566 ; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
4567 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
4569 ; X32-AVX512-LABEL: test_mm_set1_epi32:
4570 ; X32-AVX512: # %bb.0:
4571 ; X32-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7]
4572 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
4573 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
4574 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1
4575 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2
4576 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
4577 %res = bitcast <4 x i32> %res3 to <2 x i64>
4581 ; TODO test_mm_set1_epi64
4583 define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
4584 ; X86-SSE-LABEL: test_mm_set1_epi64x:
4586 ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
4587 ; X86-SSE-NEXT: # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04]
4588 ; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
4589 ; X86-SSE-NEXT: # encoding: [0x66,0x0f,0x6e,0x4c,0x24,0x08]
4590 ; X86-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
4591 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4592 ; X86-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44]
4593 ; X86-SSE-NEXT: # xmm0 = xmm0[0,1,0,1]
4594 ; X86-SSE-NEXT: retl # encoding: [0xc3]
4596 ; X86-AVX1-LABEL: test_mm_set1_epi64x:
4597 ; X86-AVX1: # %bb.0:
4598 ; X86-AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
4599 ; X86-AVX1-NEXT: # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
4600 ; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
4601 ; X86-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44]
4602 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1]
4603 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
4605 ; X86-AVX512-LABEL: test_mm_set1_epi64x:
4606 ; X86-AVX512: # %bb.0:
4607 ; X86-AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
4608 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
4609 ; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
4610 ; X86-AVX512-NEXT: vpbroadcastq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xc0]
4611 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
4613 ; X64-SSE-LABEL: test_mm_set1_epi64x:
4615 ; X64-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
4616 ; X64-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44]
4617 ; X64-SSE-NEXT: # xmm0 = xmm0[0,1,0,1]
4618 ; X64-SSE-NEXT: retq # encoding: [0xc3]
4620 ; X64-AVX1-LABEL: test_mm_set1_epi64x:
4621 ; X64-AVX1: # %bb.0:
4622 ; X64-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
4623 ; X64-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44]
4624 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1]
4625 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
4627 ; X64-AVX512-LABEL: test_mm_set1_epi64x:
4628 ; X64-AVX512: # %bb.0:
4629 ; X64-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7]
4630 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4632 ; X32-SSE-LABEL: test_mm_set1_epi64x:
4634 ; X32-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
4635 ; X32-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44]
4636 ; X32-SSE-NEXT: # xmm0 = xmm0[0,1,0,1]
4637 ; X32-SSE-NEXT: retq # encoding: [0xc3]
4639 ; X32-AVX1-LABEL: test_mm_set1_epi64x:
4640 ; X32-AVX1: # %bb.0:
4641 ; X32-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
4642 ; X32-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44]
4643 ; X32-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1]
4644 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
4646 ; X32-AVX512-LABEL: test_mm_set1_epi64x:
4647 ; X32-AVX512: # %bb.0:
4648 ; X32-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7]
4649 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
4650 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
4651 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
4655 define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
4656 ; X86-SSE-LABEL: test_mm_set1_pd:
4658 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
4659 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
4660 ; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
4661 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0]
4662 ; X86-SSE-NEXT: retl # encoding: [0xc3]
4664 ; X86-AVX1-LABEL: test_mm_set1_pd:
4665 ; X86-AVX1: # %bb.0:
4666 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
4667 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
4668 ; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
4669 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0]
4670 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
4672 ; X86-AVX512-LABEL: test_mm_set1_pd:
4673 ; X86-AVX512: # %bb.0:
4674 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
4675 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04]
4676 ; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
4677 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,0]
4678 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
4680 ; X64-SSE-LABEL: test_mm_set1_pd:
4682 ; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
4683 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0]
4684 ; X64-SSE-NEXT: retq # encoding: [0xc3]
4686 ; X64-AVX1-LABEL: test_mm_set1_pd:
4687 ; X64-AVX1: # %bb.0:
4688 ; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
4689 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0]
4690 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
4692 ; X64-AVX512-LABEL: test_mm_set1_pd:
4693 ; X64-AVX512: # %bb.0:
4694 ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
4695 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0]
4696 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4698 ; X32-SSE-LABEL: test_mm_set1_pd:
4700 ; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
4701 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0]
4702 ; X32-SSE-NEXT: retq # encoding: [0xc3]
4704 ; X32-AVX1-LABEL: test_mm_set1_pd:
4705 ; X32-AVX1: # %bb.0:
4706 ; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
4707 ; X32-AVX1-NEXT: # xmm0 = xmm0[0,0]
4708 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
4710 ; X32-AVX512-LABEL: test_mm_set1_pd:
4711 ; X32-AVX512: # %bb.0:
4712 ; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
4713 ; X32-AVX512-NEXT: # xmm0 = xmm0[0,0]
4714 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
4715 %res0 = insertelement <2 x double> undef, double %a0, i32 0
4716 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
4717 ret <2 x double> %res1
4720 define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
4721 ; X86-SSE-LABEL: test_mm_setr_epi8:
4723 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4724 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4725 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
4726 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4727 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
4728 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4729 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4730 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4731 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
4732 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4733 ; X86-SSE-NEXT: punpcklbw %xmm2, %xmm0 # encoding: [0x66,0x0f,0x60,0xc2]
4734 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
4735 ; X86-SSE-NEXT: punpcklwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x61,0xc1]
4736 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4737 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4738 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4739 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
4740 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4741 ; X86-SSE-NEXT: punpcklbw %xmm1, %xmm2 # encoding: [0x66,0x0f,0x60,0xd1]
4742 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
4743 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4744 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4745 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
4746 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4747 ; X86-SSE-NEXT: punpcklbw %xmm3, %xmm1 # encoding: [0x66,0x0f,0x60,0xcb]
4748 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
4749 ; X86-SSE-NEXT: punpcklwd %xmm2, %xmm1 # encoding: [0x66,0x0f,0x61,0xca]
4750 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
4751 ; X86-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
4752 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
4753 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4754 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4755 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
4756 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4757 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
4758 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4759 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4760 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4761 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
4762 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4763 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4764 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4765 ; X86-SSE-NEXT: punpcklwd %xmm3, %xmm2 # encoding: [0x66,0x0f,0x61,0xd3]
4766 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
4767 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4768 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4769 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
4770 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4771 ; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
4772 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4773 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4774 ; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
4775 ; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4776 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4777 ; X86-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
4778 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
4779 ; X86-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
4780 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
4781 ; X86-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
4782 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
4783 ; X86-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4784 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
4785 ; X86-SSE-NEXT: retl # encoding: [0xc3]
4787 ; X86-AVX1-LABEL: test_mm_setr_epi8:
4788 ; X86-AVX1: # %bb.0:
4789 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4790 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
4791 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4792 ; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
4793 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
4794 ; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
4795 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4796 ; X86-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
4797 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
4798 ; X86-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
4799 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4800 ; X86-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
4801 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
4802 ; X86-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4803 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4804 ; X86-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4805 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
4806 ; X86-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4807 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4808 ; X86-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4809 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
4810 ; X86-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4811 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4812 ; X86-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4813 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
4814 ; X86-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4815 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4816 ; X86-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4817 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
4818 ; X86-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4819 ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4820 ; X86-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4821 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
4823 ; X86-AVX512-LABEL: test_mm_setr_epi8:
4824 ; X86-AVX512: # %bb.0:
4825 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4826 ; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
4827 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4828 ; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
4829 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
4830 ; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
4831 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4832 ; X86-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03]
4833 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14]
4834 ; X86-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
4835 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4836 ; X86-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
4837 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c]
4838 ; X86-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4839 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4840 ; X86-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4841 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24]
4842 ; X86-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4843 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4844 ; X86-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4845 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c]
4846 ; X86-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4847 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4848 ; X86-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4849 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34]
4850 ; X86-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4851 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4852 ; X86-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4853 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c]
4854 ; X86-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4855 ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4856 ; X86-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4857 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
4859 ; X64-SSE-LABEL: test_mm_setr_epi8:
4861 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
4862 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4863 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
4864 ; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4865 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
4866 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4867 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4868 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4869 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4870 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4871 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4872 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4873 ; X64-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
4874 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
4875 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4876 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4877 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4878 ; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4879 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
4880 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4881 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4882 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4883 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4884 ; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4885 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
4886 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4887 ; X64-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
4888 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
4889 ; X64-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
4890 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4891 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4892 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4893 ; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4894 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4895 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4896 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4897 ; X64-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
4898 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4899 ; X64-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
4900 ; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
4901 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
4902 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4903 ; X64-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
4904 ; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
4905 ; X64-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
4906 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4907 ; X64-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
4908 ; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4909 ; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4910 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4911 ; X64-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
4912 ; X64-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
4913 ; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
4914 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4915 ; X64-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
4916 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
4917 ; X64-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
4918 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4919 ; X64-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
4920 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
4921 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
4922 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
4923 ; X64-SSE-NEXT: retq # encoding: [0xc3]
4925 ; X64-AVX1-LABEL: test_mm_setr_epi8:
4926 ; X64-AVX1: # %bb.0:
4927 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
4928 ; X64-AVX1-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01]
4929 ; X64-AVX1-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02]
4930 ; X64-AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03]
4931 ; X64-AVX1-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04]
4932 ; X64-AVX1-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05]
4933 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4934 ; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4935 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4936 ; X64-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4937 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4938 ; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4939 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4940 ; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4941 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4942 ; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4943 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4944 ; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4945 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4946 ; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4947 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4948 ; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4949 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
4950 ; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4951 ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
4952 ; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4953 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
4955 ; X64-AVX512-LABEL: test_mm_setr_epi8:
4956 ; X64-AVX512: # %bb.0:
4957 ; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
4958 ; X64-AVX512-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01]
4959 ; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02]
4960 ; X64-AVX512-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03]
4961 ; X64-AVX512-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04]
4962 ; X64-AVX512-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05]
4963 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4964 ; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
4965 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10]
4966 ; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
4967 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18]
4968 ; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
4969 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20]
4970 ; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
4971 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28]
4972 ; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
4973 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30]
4974 ; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
4975 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38]
4976 ; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
4977 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40]
4978 ; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
4979 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48]
4980 ; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
4981 ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50]
4982 ; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
4983 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
4985 ; X32-SSE-LABEL: test_mm_setr_epi8:
4987 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
4988 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4989 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
4990 ; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
4991 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
4992 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4993 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
4994 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
4995 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
4996 ; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
4997 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
4998 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4999 ; X32-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
5000 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
5001 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
5002 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
5003 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
5004 ; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
5005 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
5006 ; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
5007 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
5008 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
5009 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
5010 ; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
5011 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8]
5012 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
5013 ; X32-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb]
5014 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
5015 ; X32-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca]
5016 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
5017 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
5018 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
5019 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
5020 ; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
5021 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
5022 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
5023 ; X32-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1]
5024 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
5025 ; X32-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0]
5026 ; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
5027 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8]
5028 ; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
5029 ; X32-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda]
5030 ; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
5031 ; X32-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
5032 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
5033 ; X32-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2]
5034 ; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
5035 ; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0]
5036 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
5037 ; X32-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6]
5038 ; X32-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
5039 ; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7]
5040 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
5041 ; X32-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4]
5042 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
5043 ; X32-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2]
5044 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
5045 ; X32-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3]
5046 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
5047 ; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
5048 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
5049 ; X32-SSE-NEXT: retq # encoding: [0xc3]
5051 ; X32-AVX1-LABEL: test_mm_setr_epi8:
5052 ; X32-AVX1: # %bb.0:
5053 ; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
5054 ; X32-AVX1-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01]
5055 ; X32-AVX1-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02]
5056 ; X32-AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03]
5057 ; X32-AVX1-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04]
5058 ; X32-AVX1-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05]
5059 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
5060 ; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5061 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
5062 ; X32-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5063 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
5064 ; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
5065 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
5066 ; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
5067 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
5068 ; X32-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
5069 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
5070 ; X32-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
5071 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
5072 ; X32-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
5073 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
5074 ; X32-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
5075 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
5076 ; X32-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
5077 ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
5078 ; X32-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
5079 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
5081 ; X32-AVX512-LABEL: test_mm_setr_epi8:
5082 ; X32-AVX512: # %bb.0:
5083 ; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
5084 ; X32-AVX512-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01]
5085 ; X32-AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02]
5086 ; X32-AVX512-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03]
5087 ; X32-AVX512-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04]
5088 ; X32-AVX512-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05]
5089 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08]
5090 ; X32-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5091 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10]
5092 ; X32-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5093 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18]
5094 ; X32-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
5095 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20]
5096 ; X32-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09]
5097 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28]
5098 ; X32-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
5099 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30]
5100 ; X32-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b]
5101 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38]
5102 ; X32-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
5103 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40]
5104 ; X32-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d]
5105 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48]
5106 ; X32-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
5107 ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50]
5108 ; X32-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f]
5109 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
5110 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
5111 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
5112 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2
5113 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3
5114 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4
5115 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5
5116 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6
5117 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7
5118 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8
5119 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9
5120 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10
5121 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
5122 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
5123 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
5124 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
5125 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
5126 %res = bitcast <16 x i8> %res15 to <2 x i64>
5130 define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
5131 ; X86-SSE-LABEL: test_mm_setr_epi16:
5133 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
5134 ; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8]
5135 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
5136 ; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0]
5137 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
5138 ; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0]
5139 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
5140 ; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8]
5141 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
5142 ; X86-SSE-NEXT: movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8]
5143 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
5144 ; X86-SSE-NEXT: movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0]
5145 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
5146 ; X86-SSE-NEXT: movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8]
5147 ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
5148 ; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
5149 ; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1]
5150 ; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
5151 ; X86-SSE-NEXT: punpcklwd %xmm4, %xmm3 # encoding: [0x66,0x0f,0x61,0xdc]
5152 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
5153 ; X86-SSE-NEXT: punpckldq %xmm2, %xmm3 # encoding: [0x66,0x0f,0x62,0xda]
5154 ; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
5155 ; X86-SSE-NEXT: punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5]
5156 ; X86-SSE-NEXT: # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
5157 ; X86-SSE-NEXT: punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7]
5158 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
5159 ; X86-SSE-NEXT: punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6]
5160 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
5161 ; X86-SSE-NEXT: punpcklqdq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc3]
5162 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0]
5163 ; X86-SSE-NEXT: retl # encoding: [0xc3]
5165 ; X86-AVX1-LABEL: test_mm_setr_epi16:
5166 ; X86-AVX1: # %bb.0:
5167 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
5168 ; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0]
5169 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
5170 ; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5171 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
5172 ; X86-AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5173 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
5174 ; X86-AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
5175 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
5176 ; X86-AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5177 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
5178 ; X86-AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5179 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
5180 ; X86-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5181 ; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
5182 ; X86-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5183 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
5185 ; X86-AVX512-LABEL: test_mm_setr_epi16:
5186 ; X86-AVX512: # %bb.0:
5187 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
5188 ; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5189 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08]
5190 ; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5191 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c]
5192 ; X86-AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5193 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
5194 ; X86-AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
5195 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14]
5196 ; X86-AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5197 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18]
5198 ; X86-AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5199 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c]
5200 ; X86-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5201 ; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20]
5202 ; X86-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5203 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
5205 ; X64-SSE-LABEL: test_mm_setr_epi16:
5207 ; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
5208 ; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
5209 ; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
5210 ; X64-SSE-NEXT: movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca]
5211 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
5212 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
5213 ; X64-SSE-NEXT: movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1]
5214 ; X64-SSE-NEXT: movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0]
5215 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
5216 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
5217 ; X64-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
5218 ; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5219 ; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
5220 ; X64-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
5221 ; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
5222 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
5223 ; X64-SSE-NEXT: movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde]
5224 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
5225 ; X64-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
5226 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
5227 ; X64-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
5228 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5229 ; X64-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
5230 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0]
5231 ; X64-SSE-NEXT: retq # encoding: [0xc3]
5233 ; X64-AVX1-LABEL: test_mm_setr_epi16:
5234 ; X64-AVX1: # %bb.0:
5235 ; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
5236 ; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
5237 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
5238 ; X64-AVX1-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
5239 ; X64-AVX1-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
5240 ; X64-AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
5241 ; X64-AVX1-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
5242 ; X64-AVX1-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
5243 ; X64-AVX1-NEXT: vpinsrw $6, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x06]
5244 ; X64-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5245 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
5247 ; X64-AVX512-LABEL: test_mm_setr_epi16:
5248 ; X64-AVX512: # %bb.0:
5249 ; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10]
5250 ; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08]
5251 ; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
5252 ; X64-AVX512-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
5253 ; X64-AVX512-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
5254 ; X64-AVX512-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
5255 ; X64-AVX512-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
5256 ; X64-AVX512-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
5257 ; X64-AVX512-NEXT: vpinsrw $6, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x06]
5258 ; X64-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5259 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
5261 ; X32-SSE-LABEL: test_mm_setr_epi16:
5263 ; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10]
5264 ; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08]
5265 ; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0]
5266 ; X32-SSE-NEXT: movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca]
5267 ; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
5268 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
5269 ; X32-SSE-NEXT: movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1]
5270 ; X32-SSE-NEXT: movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0]
5271 ; X32-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0]
5272 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
5273 ; X32-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1]
5274 ; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5275 ; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
5276 ; X32-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
5277 ; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8]
5278 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
5279 ; X32-SSE-NEXT: movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde]
5280 ; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
5281 ; X32-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3]
5282 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
5283 ; X32-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1]
5284 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5285 ; X32-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2]
5286 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0]
5287 ; X32-SSE-NEXT: retq # encoding: [0xc3]
5289 ; X32-AVX1-LABEL: test_mm_setr_epi16:
5290 ; X32-AVX1: # %bb.0:
5291 ; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10]
5292 ; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08]
5293 ; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
5294 ; X32-AVX1-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
5295 ; X32-AVX1-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
5296 ; X32-AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
5297 ; X32-AVX1-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
5298 ; X32-AVX1-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
5299 ; X32-AVX1-NEXT: vpinsrw $6, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x06]
5300 ; X32-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5301 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
5303 ; X32-AVX512-LABEL: test_mm_setr_epi16:
5304 ; X32-AVX512: # %bb.0:
5305 ; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10]
5306 ; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08]
5307 ; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
5308 ; X32-AVX512-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01]
5309 ; X32-AVX512-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02]
5310 ; X32-AVX512-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
5311 ; X32-AVX512-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04]
5312 ; X32-AVX512-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05]
5313 ; X32-AVX512-NEXT: vpinsrw $6, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x06]
5314 ; X32-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5315 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
5316 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
5317 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
5318 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2
5319 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3
5320 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4
5321 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5
5322 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6
5323 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7
5324 %res = bitcast <8 x i16> %res7 to <2 x i64>
5328 define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
5329 ; X86-SSE-LABEL: test_mm_setr_epi32:
5331 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5332 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
5333 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
5334 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
5335 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
5336 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5337 ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
5338 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08]
5339 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5340 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
5341 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
5342 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
5343 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
5344 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
5345 ; X86-SSE-NEXT: retl # encoding: [0xc3]
5347 ; X86-AVX1-LABEL: test_mm_setr_epi32:
5348 ; X86-AVX1: # %bb.0:
5349 ; X86-AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5350 ; X86-AVX1-NEXT: # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
5351 ; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
5352 ; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
5353 ; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
5354 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
5356 ; X86-AVX512-LABEL: test_mm_setr_epi32:
5357 ; X86-AVX512: # %bb.0:
5358 ; X86-AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5359 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
5360 ; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
5361 ; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
5362 ; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
5363 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
5365 ; X64-SSE-LABEL: test_mm_setr_epi32:
5367 ; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
5368 ; X64-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
5369 ; X64-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
5370 ; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5371 ; X64-SSE-NEXT: movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6]
5372 ; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
5373 ; X64-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
5374 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
5375 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
5376 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
5377 ; X64-SSE-NEXT: retq # encoding: [0xc3]
5379 ; X64-AVX1-LABEL: test_mm_setr_epi32:
5380 ; X64-AVX1: # %bb.0:
5381 ; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
5382 ; X64-AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
5383 ; X64-AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
5384 ; X64-AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
5385 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
5387 ; X64-AVX512-LABEL: test_mm_setr_epi32:
5388 ; X64-AVX512: # %bb.0:
5389 ; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
5390 ; X64-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
5391 ; X64-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
5392 ; X64-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
5393 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
5395 ; X32-SSE-LABEL: test_mm_setr_epi32:
5397 ; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1]
5398 ; X32-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca]
5399 ; X32-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8]
5400 ; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5401 ; X32-SSE-NEXT: movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6]
5402 ; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7]
5403 ; X32-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2]
5404 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
5405 ; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
5406 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
5407 ; X32-SSE-NEXT: retq # encoding: [0xc3]
5409 ; X32-AVX1-LABEL: test_mm_setr_epi32:
5410 ; X32-AVX1: # %bb.0:
5411 ; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7]
5412 ; X32-AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
5413 ; X32-AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
5414 ; X32-AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
5415 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
5417 ; X32-AVX512-LABEL: test_mm_setr_epi32:
5418 ; X32-AVX512: # %bb.0:
5419 ; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
5420 ; X32-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01]
5421 ; X32-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
5422 ; X32-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03]
5423 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
5424 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
5425 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
5426 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2
5427 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3
5428 %res = bitcast <4 x i32> %res3 to <2 x i64>
5432 ; TODO test_mm_setr_epi64
5434 define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
5435 ; X86-SSE-LABEL: test_mm_setr_epi64x:
5437 ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
5438 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c]
5439 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5440 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10]
5441 ; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8]
5442 ; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5443 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5444 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04]
5445 ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
5446 ; X86-SSE-NEXT: # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08]
5447 ; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2]
5448 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
5449 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
5450 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
5451 ; X86-SSE-NEXT: retl # encoding: [0xc3]
5453 ; X86-AVX1-LABEL: test_mm_setr_epi64x:
5454 ; X86-AVX1: # %bb.0:
5455 ; X86-AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5456 ; X86-AVX1-NEXT: # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
5457 ; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
5458 ; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
5459 ; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
5460 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
5462 ; X86-AVX512-LABEL: test_mm_setr_epi64x:
5463 ; X86-AVX512: # %bb.0:
5464 ; X86-AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5465 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04]
5466 ; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01]
5467 ; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02]
5468 ; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03]
5469 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
5471 ; X64-SSE-LABEL: test_mm_setr_epi64x:
5473 ; X64-SSE-NEXT: movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce]
5474 ; X64-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
5475 ; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
5476 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
5477 ; X64-SSE-NEXT: retq # encoding: [0xc3]
5479 ; X64-AVX1-LABEL: test_mm_setr_epi64x:
5480 ; X64-AVX1: # %bb.0:
5481 ; X64-AVX1-NEXT: vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
5482 ; X64-AVX1-NEXT: vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
5483 ; X64-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
5484 ; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0]
5485 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
5487 ; X64-AVX512-LABEL: test_mm_setr_epi64x:
5488 ; X64-AVX512: # %bb.0:
5489 ; X64-AVX512-NEXT: vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
5490 ; X64-AVX512-NEXT: vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
5491 ; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
5492 ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
5493 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
5495 ; X32-SSE-LABEL: test_mm_setr_epi64x:
5497 ; X32-SSE-NEXT: movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce]
5498 ; X32-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7]
5499 ; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1]
5500 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
5501 ; X32-SSE-NEXT: retq # encoding: [0xc3]
5503 ; X32-AVX1-LABEL: test_mm_setr_epi64x:
5504 ; X32-AVX1: # %bb.0:
5505 ; X32-AVX1-NEXT: vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
5506 ; X32-AVX1-NEXT: vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
5507 ; X32-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0]
5508 ; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0]
5509 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
5511 ; X32-AVX512-LABEL: test_mm_setr_epi64x:
5512 ; X32-AVX512: # %bb.0:
5513 ; X32-AVX512-NEXT: vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6]
5514 ; X32-AVX512-NEXT: vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf]
5515 ; X32-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0]
5516 ; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
5517 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
5518 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
5519 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1
5523 define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
5524 ; X86-SSE-LABEL: test_mm_setr_pd:
5526 ; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
5527 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x0c]
5528 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
5529 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04]
5530 ; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
5531 ; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
5532 ; X86-SSE-NEXT: retl # encoding: [0xc3]
5534 ; X86-AVX1-LABEL: test_mm_setr_pd:
5535 ; X86-AVX1: # %bb.0:
5536 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
5537 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
5538 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
5539 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
5540 ; X86-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0]
5541 ; X86-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0]
5542 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
5544 ; X86-AVX512-LABEL: test_mm_setr_pd:
5545 ; X86-AVX512: # %bb.0:
5546 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
5547 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c]
5548 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
5549 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04]
5550 ; X86-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0]
5551 ; X86-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0]
5552 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
5554 ; X64-SSE-LABEL: test_mm_setr_pd:
5556 ; X64-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
5557 ; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
5558 ; X64-SSE-NEXT: retq # encoding: [0xc3]
5560 ; X64-AVX1-LABEL: test_mm_setr_pd:
5561 ; X64-AVX1: # %bb.0:
5562 ; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
5563 ; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0]
5564 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
5566 ; X64-AVX512-LABEL: test_mm_setr_pd:
5567 ; X64-AVX512: # %bb.0:
5568 ; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
5569 ; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0]
5570 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
5572 ; X32-SSE-LABEL: test_mm_setr_pd:
5574 ; X32-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
5575 ; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
5576 ; X32-SSE-NEXT: retq # encoding: [0xc3]
5578 ; X32-AVX1-LABEL: test_mm_setr_pd:
5579 ; X32-AVX1: # %bb.0:
5580 ; X32-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
5581 ; X32-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0]
5582 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
5584 ; X32-AVX512-LABEL: test_mm_setr_pd:
5585 ; X32-AVX512: # %bb.0:
5586 ; X32-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
5587 ; X32-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0]
5588 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
5589 %res0 = insertelement <2 x double> undef, double %a0, i32 0
5590 %res1 = insertelement <2 x double> %res0, double %a1, i32 1
5591 ret <2 x double> %res1
5594 define <2 x double> @test_mm_setzero_pd() {
5595 ; SSE-LABEL: test_mm_setzero_pd:
5597 ; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
5598 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5600 ; AVX1-LABEL: test_mm_setzero_pd:
5602 ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
5603 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5605 ; AVX512-LABEL: test_mm_setzero_pd:
5607 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
5608 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5609 ret <2 x double> zeroinitializer
5612 define <2 x i64> @test_mm_setzero_si128() {
5613 ; SSE-LABEL: test_mm_setzero_si128:
5615 ; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
5616 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5618 ; AVX1-LABEL: test_mm_setzero_si128:
5620 ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
5621 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5623 ; AVX512-LABEL: test_mm_setzero_si128:
5625 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
5626 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5627 ret <2 x i64> zeroinitializer
5630 define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
5631 ; SSE-LABEL: test_mm_shuffle_epi32:
5633 ; SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00]
5634 ; SSE-NEXT: # xmm0 = xmm0[0,0,0,0]
5635 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5637 ; AVX1-LABEL: test_mm_shuffle_epi32:
5639 ; AVX1-NEXT: vshufps $0, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc0,0x00]
5640 ; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0]
5641 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5643 ; AVX512-LABEL: test_mm_shuffle_epi32:
5645 ; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
5646 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5647 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5648 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
5649 %bc = bitcast <4 x i32> %res to <2 x i64>
5653 define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
5654 ; SSE-LABEL: test_mm_shuffle_pd:
5656 ; SSE-NEXT: shufps $78, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x4e]
5657 ; SSE-NEXT: # xmm0 = xmm0[2,3],xmm1[0,1]
5658 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5660 ; AVX1-LABEL: test_mm_shuffle_pd:
5662 ; AVX1-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc1,0x01]
5663 ; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[0]
5664 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5666 ; AVX512-LABEL: test_mm_shuffle_pd:
5668 ; AVX512-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc1,0x01]
5669 ; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[0]
5670 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5671 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
5672 ret <2 x double> %res
5675 define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
5676 ; SSE-LABEL: test_mm_shufflehi_epi16:
5678 ; SSE-NEXT: pshufhw $0, %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x70,0xc0,0x00]
5679 ; SSE-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4]
5680 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5682 ; AVX1-LABEL: test_mm_shufflehi_epi16:
5684 ; AVX1-NEXT: vpshufhw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x70,0xc0,0x00]
5685 ; AVX1-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4]
5686 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5688 ; AVX512-LABEL: test_mm_shufflehi_epi16:
5690 ; AVX512-NEXT: vpshufhw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x00]
5691 ; AVX512-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4]
5692 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5693 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5694 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
5695 %bc = bitcast <8 x i16> %res to <2 x i64>
5699 define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
5700 ; SSE-LABEL: test_mm_shufflelo_epi16:
5702 ; SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00]
5703 ; SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
5704 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5706 ; AVX1-LABEL: test_mm_shufflelo_epi16:
5708 ; AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00]
5709 ; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
5710 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5712 ; AVX512-LABEL: test_mm_shufflelo_epi16:
5714 ; AVX512-NEXT: vpshuflw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x00]
5715 ; AVX512-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7]
5716 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5717 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5718 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
5719 %bc = bitcast <8 x i16> %res to <2 x i64>
5723 define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
5724 ; SSE-LABEL: test_mm_sll_epi16:
5726 ; SSE-NEXT: psllw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf1,0xc1]
5727 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5729 ; AVX1-LABEL: test_mm_sll_epi16:
5731 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf1,0xc1]
5732 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5734 ; AVX512-LABEL: test_mm_sll_epi16:
5736 ; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
5737 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5738 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5739 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
5740 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
5741 %bc = bitcast <8 x i16> %res to <2 x i64>
5744 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
5746 define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
5747 ; SSE-LABEL: test_mm_sll_epi32:
5749 ; SSE-NEXT: pslld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf2,0xc1]
5750 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5752 ; AVX1-LABEL: test_mm_sll_epi32:
5754 ; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf2,0xc1]
5755 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5757 ; AVX512-LABEL: test_mm_sll_epi32:
5759 ; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1]
5760 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5761 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5762 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
5763 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
5764 %bc = bitcast <4 x i32> %res to <2 x i64>
5767 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
5769 define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
5770 ; SSE-LABEL: test_mm_sll_epi64:
5772 ; SSE-NEXT: psllq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf3,0xc1]
5773 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5775 ; AVX1-LABEL: test_mm_sll_epi64:
5777 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf3,0xc1]
5778 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5780 ; AVX512-LABEL: test_mm_sll_epi64:
5782 ; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1]
5783 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5784 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
5787 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
5789 define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
5790 ; SSE-LABEL: test_mm_slli_epi16:
5792 ; SSE-NEXT: psllw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xf0,0x01]
5793 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5795 ; AVX1-LABEL: test_mm_slli_epi16:
5797 ; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x01]
5798 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5800 ; AVX512-LABEL: test_mm_slli_epi16:
5802 ; AVX512-NEXT: vpsllw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x01]
5803 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5804 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
5805 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
5806 %bc = bitcast <8 x i16> %res to <2 x i64>
5809 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
5811 define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
5812 ; SSE-LABEL: test_mm_slli_epi32:
5814 ; SSE-NEXT: pslld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xf0,0x01]
5815 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5817 ; AVX1-LABEL: test_mm_slli_epi32:
5819 ; AVX1-NEXT: vpslld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xf0,0x01]
5820 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5822 ; AVX512-LABEL: test_mm_slli_epi32:
5824 ; AVX512-NEXT: vpslld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x01]
5825 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5826 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
5827 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
5828 %bc = bitcast <4 x i32> %res to <2 x i64>
5831 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
5833 define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
5834 ; SSE-LABEL: test_mm_slli_epi64:
5836 ; SSE-NEXT: psllq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xf0,0x01]
5837 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5839 ; AVX1-LABEL: test_mm_slli_epi64:
5841 ; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf0,0x01]
5842 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5844 ; AVX512-LABEL: test_mm_slli_epi64:
5846 ; AVX512-NEXT: vpsllq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x01]
5847 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5848 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
5851 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
5853 define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
5854 ; SSE-LABEL: test_mm_slli_si128:
5856 ; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05]
5857 ; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
5858 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5860 ; AVX1-LABEL: test_mm_slli_si128:
5862 ; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05]
5863 ; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
5864 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5866 ; AVX512-LABEL: test_mm_slli_si128:
5868 ; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05]
5869 ; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
5870 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5871 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
5872 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
5873 %bc = bitcast <16 x i8> %res to <2 x i64>
5877 define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
5878 ; SSE-LABEL: test_mm_sqrt_pd:
5880 ; SSE-NEXT: sqrtpd %xmm0, %xmm0 # encoding: [0x66,0x0f,0x51,0xc0]
5881 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5883 ; AVX1-LABEL: test_mm_sqrt_pd:
5885 ; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x51,0xc0]
5886 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5888 ; AVX512-LABEL: test_mm_sqrt_pd:
5890 ; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
5891 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5892 %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0)
5893 ret <2 x double> %res
5895 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone
5897 define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
5898 ; SSE-LABEL: test_mm_sqrt_sd:
5900 ; SSE-NEXT: sqrtsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0x51,0xc8]
5901 ; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1]
5902 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5904 ; AVX1-LABEL: test_mm_sqrt_sd:
5906 ; AVX1-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf3,0x51,0xc0]
5907 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5909 ; AVX512-LABEL: test_mm_sqrt_sd:
5911 ; AVX512-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf3,0x51,0xc0]
5912 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5913 %ext = extractelement <2 x double> %a0, i32 0
5914 %sqrt = call double @llvm.sqrt.f64(double %ext)
5915 %ins = insertelement <2 x double> %a1, double %sqrt, i32 0
5916 ret <2 x double> %ins
5918 declare double @llvm.sqrt.f64(double) nounwind readnone
5920 ; This doesn't match a clang test, but helps with fast-isel coverage.
5921 define double @test_mm_sqrt_sd_scalar(double %a0) nounwind {
5922 ; X86-SSE-LABEL: test_mm_sqrt_sd_scalar:
5924 ; X86-SSE-NEXT: pushl %ebp # encoding: [0x55]
5925 ; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
5926 ; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
5927 ; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08]
5928 ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
5929 ; X86-SSE-NEXT: # encoding: [0xf2,0x0f,0x10,0x45,0x08]
5930 ; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
5931 ; X86-SSE-NEXT: movsd %xmm0, (%esp) # encoding: [0xf2,0x0f,0x11,0x04,0x24]
5932 ; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24]
5933 ; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
5934 ; X86-SSE-NEXT: popl %ebp # encoding: [0x5d]
5935 ; X86-SSE-NEXT: retl # encoding: [0xc3]
5937 ; X86-AVX1-LABEL: test_mm_sqrt_sd_scalar:
5938 ; X86-AVX1: # %bb.0:
5939 ; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55]
5940 ; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
5941 ; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
5942 ; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08]
5943 ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
5944 ; X86-AVX1-NEXT: # encoding: [0xc5,0xfb,0x10,0x45,0x08]
5945 ; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
5946 ; X86-AVX1-NEXT: vmovsd %xmm0, (%esp) # encoding: [0xc5,0xfb,0x11,0x04,0x24]
5947 ; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24]
5948 ; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
5949 ; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d]
5950 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
5952 ; X86-AVX512-LABEL: test_mm_sqrt_sd_scalar:
5953 ; X86-AVX512: # %bb.0:
5954 ; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55]
5955 ; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
5956 ; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8]
5957 ; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08]
5958 ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
5959 ; X86-AVX512-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x45,0x08]
5960 ; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
5961 ; X86-AVX512-NEXT: vmovsd %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x24]
5962 ; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24]
5963 ; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
5964 ; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d]
5965 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
5967 ; X64-SSE-LABEL: test_mm_sqrt_sd_scalar:
5969 ; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
5970 ; X64-SSE-NEXT: retq # encoding: [0xc3]
5972 ; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar:
5973 ; X64-AVX1: # %bb.0:
5974 ; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
5975 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
5977 ; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar:
5978 ; X64-AVX512: # %bb.0:
5979 ; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
5980 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
5982 ; X32-SSE-LABEL: test_mm_sqrt_sd_scalar:
5984 ; X32-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]
5985 ; X32-SSE-NEXT: retq # encoding: [0xc3]
5987 ; X32-AVX1-LABEL: test_mm_sqrt_sd_scalar:
5988 ; X32-AVX1: # %bb.0:
5989 ; X32-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0]
5990 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
5992 ; X32-AVX512-LABEL: test_mm_sqrt_sd_scalar:
5993 ; X32-AVX512: # %bb.0:
5994 ; X32-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
5995 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
5996 %sqrt = call double @llvm.sqrt.f64(double %a0)
6000 define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
6001 ; SSE-LABEL: test_mm_sra_epi16:
6003 ; SSE-NEXT: psraw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe1,0xc1]
6004 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6006 ; AVX1-LABEL: test_mm_sra_epi16:
6008 ; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe1,0xc1]
6009 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6011 ; AVX512-LABEL: test_mm_sra_epi16:
6013 ; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
6014 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6015 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6016 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
6017 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
6018 %bc = bitcast <8 x i16> %res to <2 x i64>
6021 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
6023 define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
6024 ; SSE-LABEL: test_mm_sra_epi32:
6026 ; SSE-NEXT: psrad %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe2,0xc1]
6027 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6029 ; AVX1-LABEL: test_mm_sra_epi32:
6031 ; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe2,0xc1]
6032 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6034 ; AVX512-LABEL: test_mm_sra_epi32:
6036 ; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1]
6037 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6038 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
6039 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
6040 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
6041 %bc = bitcast <4 x i32> %res to <2 x i64>
6044 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
6046 define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
6047 ; SSE-LABEL: test_mm_srai_epi16:
6049 ; SSE-NEXT: psraw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xe0,0x01]
6050 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6052 ; AVX1-LABEL: test_mm_srai_epi16:
6054 ; AVX1-NEXT: vpsraw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xe0,0x01]
6055 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6057 ; AVX512-LABEL: test_mm_srai_epi16:
6059 ; AVX512-NEXT: vpsraw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x01]
6060 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6061 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6062 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
6063 %bc = bitcast <8 x i16> %res to <2 x i64>
6066 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
6068 define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
6069 ; SSE-LABEL: test_mm_srai_epi32:
6071 ; SSE-NEXT: psrad $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xe0,0x01]
6072 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6074 ; AVX1-LABEL: test_mm_srai_epi32:
6076 ; AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xe0,0x01]
6077 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6079 ; AVX512-LABEL: test_mm_srai_epi32:
6081 ; AVX512-NEXT: vpsrad $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x01]
6082 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6083 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
6084 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
6085 %bc = bitcast <4 x i32> %res to <2 x i64>
6088 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
6090 define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
6091 ; SSE-LABEL: test_mm_srl_epi16:
6093 ; SSE-NEXT: psrlw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd1,0xc1]
6094 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6096 ; AVX1-LABEL: test_mm_srl_epi16:
6098 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd1,0xc1]
6099 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6101 ; AVX512-LABEL: test_mm_srl_epi16:
6103 ; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
6104 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6105 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6106 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
6107 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
6108 %bc = bitcast <8 x i16> %res to <2 x i64>
6111 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
6113 define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
6114 ; SSE-LABEL: test_mm_srl_epi32:
6116 ; SSE-NEXT: psrld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd2,0xc1]
6117 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6119 ; AVX1-LABEL: test_mm_srl_epi32:
6121 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd2,0xc1]
6122 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6124 ; AVX512-LABEL: test_mm_srl_epi32:
6126 ; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1]
6127 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6128 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
6129 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
6130 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
6131 %bc = bitcast <4 x i32> %res to <2 x i64>
6134 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
6136 define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
6137 ; SSE-LABEL: test_mm_srl_epi64:
6139 ; SSE-NEXT: psrlq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd3,0xc1]
6140 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6142 ; AVX1-LABEL: test_mm_srl_epi64:
6144 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd3,0xc1]
6145 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6147 ; AVX512-LABEL: test_mm_srl_epi64:
6149 ; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1]
6150 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6151 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
6154 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
6156 define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
6157 ; SSE-LABEL: test_mm_srli_epi16:
6159 ; SSE-NEXT: psrlw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xd0,0x01]
6160 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6162 ; AVX1-LABEL: test_mm_srli_epi16:
6164 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xd0,0x01]
6165 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6167 ; AVX512-LABEL: test_mm_srli_epi16:
6169 ; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x01]
6170 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6171 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
6172 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
6173 %bc = bitcast <8 x i16> %res to <2 x i64>
6176 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
6178 define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
6179 ; SSE-LABEL: test_mm_srli_epi32:
6181 ; SSE-NEXT: psrld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xd0,0x01]
6182 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6184 ; AVX1-LABEL: test_mm_srli_epi32:
6186 ; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xd0,0x01]
6187 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6189 ; AVX512-LABEL: test_mm_srli_epi32:
6191 ; AVX512-NEXT: vpsrld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x01]
6192 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6193 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
6194 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
6195 %bc = bitcast <4 x i32> %res to <2 x i64>
6198 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
6200 define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
6201 ; SSE-LABEL: test_mm_srli_epi64:
6203 ; SSE-NEXT: psrlq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xd0,0x01]
6204 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6206 ; AVX1-LABEL: test_mm_srli_epi64:
6208 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd0,0x01]
6209 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6211 ; AVX512-LABEL: test_mm_srli_epi64:
6213 ; AVX512-NEXT: vpsrlq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x01]
6214 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6215 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
6218 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
6220 define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
6221 ; SSE-LABEL: test_mm_srli_si128:
6223 ; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05]
6224 ; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
6225 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6227 ; AVX1-LABEL: test_mm_srli_si128:
6229 ; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05]
6230 ; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
6231 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6233 ; AVX512-LABEL: test_mm_srli_si128:
6235 ; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05]
6236 ; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
6237 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6238 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
6239 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
6240 %bc = bitcast <16 x i8> %res to <2 x i64>
6244 define void @test_mm_store_pd(ptr%a0, <2 x double> %a1) {
6245 ; X86-SSE-LABEL: test_mm_store_pd:
6247 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6248 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
6249 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6251 ; X86-AVX1-LABEL: test_mm_store_pd:
6252 ; X86-AVX1: # %bb.0:
6253 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6254 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
6255 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6257 ; X86-AVX512-LABEL: test_mm_store_pd:
6258 ; X86-AVX512: # %bb.0:
6259 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6260 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
6261 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6263 ; X64-SSE-LABEL: test_mm_store_pd:
6265 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
6266 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6268 ; X64-AVX1-LABEL: test_mm_store_pd:
6269 ; X64-AVX1: # %bb.0:
6270 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
6271 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6273 ; X64-AVX512-LABEL: test_mm_store_pd:
6274 ; X64-AVX512: # %bb.0:
6275 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
6276 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6278 ; X32-SSE-LABEL: test_mm_store_pd:
6280 ; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07]
6281 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6283 ; X32-AVX1-LABEL: test_mm_store_pd:
6284 ; X32-AVX1: # %bb.0:
6285 ; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07]
6286 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6288 ; X32-AVX512-LABEL: test_mm_store_pd:
6289 ; X32-AVX512: # %bb.0:
6290 ; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07]
6291 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6292 store <2 x double> %a1, ptr %a0, align 16
6296 define void @test_mm_store_pd1(ptr%a0, <2 x double> %a1) {
6297 ; X86-SSE-LABEL: test_mm_store_pd1:
6299 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6300 ; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
6301 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0]
6302 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
6303 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6305 ; X86-AVX1-LABEL: test_mm_store_pd1:
6306 ; X86-AVX1: # %bb.0:
6307 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6308 ; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
6309 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0]
6310 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
6311 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6313 ; X86-AVX512-LABEL: test_mm_store_pd1:
6314 ; X86-AVX512: # %bb.0:
6315 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6316 ; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
6317 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,0]
6318 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
6319 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6321 ; X64-SSE-LABEL: test_mm_store_pd1:
6323 ; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
6324 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0]
6325 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
6326 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6328 ; X64-AVX1-LABEL: test_mm_store_pd1:
6329 ; X64-AVX1: # %bb.0:
6330 ; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
6331 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0]
6332 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
6333 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6335 ; X64-AVX512-LABEL: test_mm_store_pd1:
6336 ; X64-AVX512: # %bb.0:
6337 ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
6338 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0]
6339 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
6340 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6342 ; X32-SSE-LABEL: test_mm_store_pd1:
6344 ; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
6345 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0]
6346 ; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07]
6347 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6349 ; X32-AVX1-LABEL: test_mm_store_pd1:
6350 ; X32-AVX1: # %bb.0:
6351 ; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
6352 ; X32-AVX1-NEXT: # xmm0 = xmm0[0,0]
6353 ; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07]
6354 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6356 ; X32-AVX512-LABEL: test_mm_store_pd1:
6357 ; X32-AVX512: # %bb.0:
6358 ; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
6359 ; X32-AVX512-NEXT: # xmm0 = xmm0[0,0]
6360 ; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07]
6361 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6362 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
6363 store <2 x double> %shuf, ptr %a0, align 16
6367 define void @test_mm_store_sd(ptr%a0, <2 x double> %a1) {
6368 ; X86-SSE-LABEL: test_mm_store_sd:
6370 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6371 ; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
6372 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6374 ; X86-AVX1-LABEL: test_mm_store_sd:
6375 ; X86-AVX1: # %bb.0:
6376 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6377 ; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
6378 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6380 ; X86-AVX512-LABEL: test_mm_store_sd:
6381 ; X86-AVX512: # %bb.0:
6382 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6383 ; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
6384 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6386 ; X64-SSE-LABEL: test_mm_store_sd:
6388 ; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
6389 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6391 ; X64-AVX1-LABEL: test_mm_store_sd:
6392 ; X64-AVX1: # %bb.0:
6393 ; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
6394 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6396 ; X64-AVX512-LABEL: test_mm_store_sd:
6397 ; X64-AVX512: # %bb.0:
6398 ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
6399 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6401 ; X32-SSE-LABEL: test_mm_store_sd:
6403 ; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07]
6404 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6406 ; X32-AVX1-LABEL: test_mm_store_sd:
6407 ; X32-AVX1: # %bb.0:
6408 ; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07]
6409 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6411 ; X32-AVX512-LABEL: test_mm_store_sd:
6412 ; X32-AVX512: # %bb.0:
6413 ; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07]
6414 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6415 %ext = extractelement <2 x double> %a1, i32 0
6416 store double %ext, ptr %a0, align 1
6420 define void @test_mm_store_si128(ptr%a0, <2 x i64> %a1) {
6421 ; X86-SSE-LABEL: test_mm_store_si128:
6423 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6424 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
6425 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6427 ; X86-AVX1-LABEL: test_mm_store_si128:
6428 ; X86-AVX1: # %bb.0:
6429 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6430 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
6431 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6433 ; X86-AVX512-LABEL: test_mm_store_si128:
6434 ; X86-AVX512: # %bb.0:
6435 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6436 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
6437 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6439 ; X64-SSE-LABEL: test_mm_store_si128:
6441 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
6442 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6444 ; X64-AVX1-LABEL: test_mm_store_si128:
6445 ; X64-AVX1: # %bb.0:
6446 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
6447 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6449 ; X64-AVX512-LABEL: test_mm_store_si128:
6450 ; X64-AVX512: # %bb.0:
6451 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
6452 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6454 ; X32-SSE-LABEL: test_mm_store_si128:
6456 ; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07]
6457 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6459 ; X32-AVX1-LABEL: test_mm_store_si128:
6460 ; X32-AVX1: # %bb.0:
6461 ; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07]
6462 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6464 ; X32-AVX512-LABEL: test_mm_store_si128:
6465 ; X32-AVX512: # %bb.0:
6466 ; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07]
6467 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6468 store <2 x i64> %a1, ptr %a0, align 16
6472 define void @test_mm_store1_pd(ptr%a0, <2 x double> %a1) {
6473 ; X86-SSE-LABEL: test_mm_store1_pd:
6475 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6476 ; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
6477 ; X86-SSE-NEXT: # xmm0 = xmm0[0,0]
6478 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
6479 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6481 ; X86-AVX1-LABEL: test_mm_store1_pd:
6482 ; X86-AVX1: # %bb.0:
6483 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6484 ; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
6485 ; X86-AVX1-NEXT: # xmm0 = xmm0[0,0]
6486 ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00]
6487 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6489 ; X86-AVX512-LABEL: test_mm_store1_pd:
6490 ; X86-AVX512: # %bb.0:
6491 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6492 ; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
6493 ; X86-AVX512-NEXT: # xmm0 = xmm0[0,0]
6494 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
6495 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6497 ; X64-SSE-LABEL: test_mm_store1_pd:
6499 ; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
6500 ; X64-SSE-NEXT: # xmm0 = xmm0[0,0]
6501 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
6502 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6504 ; X64-AVX1-LABEL: test_mm_store1_pd:
6505 ; X64-AVX1: # %bb.0:
6506 ; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
6507 ; X64-AVX1-NEXT: # xmm0 = xmm0[0,0]
6508 ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07]
6509 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6511 ; X64-AVX512-LABEL: test_mm_store1_pd:
6512 ; X64-AVX512: # %bb.0:
6513 ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
6514 ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0]
6515 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
6516 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6518 ; X32-SSE-LABEL: test_mm_store1_pd:
6520 ; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0]
6521 ; X32-SSE-NEXT: # xmm0 = xmm0[0,0]
6522 ; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07]
6523 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6525 ; X32-AVX1-LABEL: test_mm_store1_pd:
6526 ; X32-AVX1: # %bb.0:
6527 ; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0]
6528 ; X32-AVX1-NEXT: # xmm0 = xmm0[0,0]
6529 ; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07]
6530 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6532 ; X32-AVX512-LABEL: test_mm_store1_pd:
6533 ; X32-AVX512: # %bb.0:
6534 ; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
6535 ; X32-AVX512-NEXT: # xmm0 = xmm0[0,0]
6536 ; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07]
6537 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6538 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
6539 store <2 x double> %shuf, ptr %a0, align 16
6543 define void @test_mm_storeh_sd(ptr%a0, <2 x double> %a1) {
6544 ; X86-SSE-LABEL: test_mm_storeh_sd:
6546 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6547 ; X86-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
6548 ; X86-SSE-NEXT: # xmm0 = xmm0[1,1]
6549 ; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
6550 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6552 ; X86-AVX1-LABEL: test_mm_storeh_sd:
6553 ; X86-AVX1: # %bb.0:
6554 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6555 ; X86-AVX1-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6556 ; X86-AVX1-NEXT: # xmm0 = xmm0[1,0]
6557 ; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
6558 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6560 ; X86-AVX512-LABEL: test_mm_storeh_sd:
6561 ; X86-AVX512: # %bb.0:
6562 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6563 ; X86-AVX512-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6564 ; X86-AVX512-NEXT: # xmm0 = xmm0[1,0]
6565 ; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
6566 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6568 ; X64-SSE-LABEL: test_mm_storeh_sd:
6570 ; X64-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
6571 ; X64-SSE-NEXT: # xmm0 = xmm0[1,1]
6572 ; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
6573 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6575 ; X64-AVX1-LABEL: test_mm_storeh_sd:
6576 ; X64-AVX1: # %bb.0:
6577 ; X64-AVX1-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6578 ; X64-AVX1-NEXT: # xmm0 = xmm0[1,0]
6579 ; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
6580 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6582 ; X64-AVX512-LABEL: test_mm_storeh_sd:
6583 ; X64-AVX512: # %bb.0:
6584 ; X64-AVX512-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6585 ; X64-AVX512-NEXT: # xmm0 = xmm0[1,0]
6586 ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
6587 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6589 ; X32-SSE-LABEL: test_mm_storeh_sd:
6591 ; X32-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
6592 ; X32-SSE-NEXT: # xmm0 = xmm0[1,1]
6593 ; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07]
6594 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6596 ; X32-AVX1-LABEL: test_mm_storeh_sd:
6597 ; X32-AVX1: # %bb.0:
6598 ; X32-AVX1-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6599 ; X32-AVX1-NEXT: # xmm0 = xmm0[1,0]
6600 ; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07]
6601 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6603 ; X32-AVX512-LABEL: test_mm_storeh_sd:
6604 ; X32-AVX512: # %bb.0:
6605 ; X32-AVX512-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6606 ; X32-AVX512-NEXT: # xmm0 = xmm0[1,0]
6607 ; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07]
6608 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6609 %ext = extractelement <2 x double> %a1, i32 1
6610 store double %ext, ptr %a0, align 8
6614 define void @test_mm_storel_epi64(ptr%a0, <2 x i64> %a1) {
6615 ; X86-SSE-LABEL: test_mm_storel_epi64:
6617 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6618 ; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00]
6619 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6621 ; X86-AVX1-LABEL: test_mm_storel_epi64:
6622 ; X86-AVX1: # %bb.0:
6623 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6624 ; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00]
6625 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6627 ; X86-AVX512-LABEL: test_mm_storel_epi64:
6628 ; X86-AVX512: # %bb.0:
6629 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6630 ; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
6631 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6633 ; X64-SSE-LABEL: test_mm_storel_epi64:
6635 ; X64-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
6636 ; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
6637 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6639 ; X64-AVX1-LABEL: test_mm_storel_epi64:
6640 ; X64-AVX1: # %bb.0:
6641 ; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
6642 ; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
6643 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6645 ; X64-AVX512-LABEL: test_mm_storel_epi64:
6646 ; X64-AVX512: # %bb.0:
6647 ; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
6648 ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
6649 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6651 ; X32-SSE-LABEL: test_mm_storel_epi64:
6653 ; X32-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
6654 ; X32-SSE-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
6655 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6657 ; X32-AVX1-LABEL: test_mm_storel_epi64:
6658 ; X32-AVX1: # %bb.0:
6659 ; X32-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
6660 ; X32-AVX1-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
6661 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6663 ; X32-AVX512-LABEL: test_mm_storel_epi64:
6664 ; X32-AVX512: # %bb.0:
6665 ; X32-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
6666 ; X32-AVX512-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
6667 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6668 %ext = extractelement <2 x i64> %a1, i32 0
6669 %bc = bitcast ptr%a0 to ptr
6670 store i64 %ext, ptr %bc, align 8
6674 define void @test_mm_storel_sd(ptr%a0, <2 x double> %a1) {
6675 ; X86-SSE-LABEL: test_mm_storel_sd:
6677 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6678 ; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00]
6679 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6681 ; X86-AVX1-LABEL: test_mm_storel_sd:
6682 ; X86-AVX1: # %bb.0:
6683 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6684 ; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00]
6685 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6687 ; X86-AVX512-LABEL: test_mm_storel_sd:
6688 ; X86-AVX512: # %bb.0:
6689 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6690 ; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00]
6691 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6693 ; X64-SSE-LABEL: test_mm_storel_sd:
6695 ; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07]
6696 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6698 ; X64-AVX1-LABEL: test_mm_storel_sd:
6699 ; X64-AVX1: # %bb.0:
6700 ; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
6701 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6703 ; X64-AVX512-LABEL: test_mm_storel_sd:
6704 ; X64-AVX512: # %bb.0:
6705 ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
6706 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6708 ; X32-SSE-LABEL: test_mm_storel_sd:
6710 ; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07]
6711 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6713 ; X32-AVX1-LABEL: test_mm_storel_sd:
6714 ; X32-AVX1: # %bb.0:
6715 ; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07]
6716 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6718 ; X32-AVX512-LABEL: test_mm_storel_sd:
6719 ; X32-AVX512: # %bb.0:
6720 ; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07]
6721 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6722 %ext = extractelement <2 x double> %a1, i32 0
6723 store double %ext, ptr %a0, align 8
6727 define void @test_mm_storer_pd(ptr%a0, <2 x double> %a1) {
6728 ; X86-SSE-LABEL: test_mm_storer_pd:
6730 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6731 ; X86-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
6732 ; X86-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
6733 ; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
6734 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6736 ; X86-AVX1-LABEL: test_mm_storer_pd:
6737 ; X86-AVX1: # %bb.0:
6738 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6739 ; X86-AVX1-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6740 ; X86-AVX1-NEXT: # xmm0 = xmm0[1,0]
6741 ; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00]
6742 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6744 ; X86-AVX512-LABEL: test_mm_storer_pd:
6745 ; X86-AVX512: # %bb.0:
6746 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6747 ; X86-AVX512-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6748 ; X86-AVX512-NEXT: # xmm0 = xmm0[1,0]
6749 ; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
6750 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6752 ; X64-SSE-LABEL: test_mm_storer_pd:
6754 ; X64-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
6755 ; X64-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
6756 ; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
6757 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6759 ; X64-AVX1-LABEL: test_mm_storer_pd:
6760 ; X64-AVX1: # %bb.0:
6761 ; X64-AVX1-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6762 ; X64-AVX1-NEXT: # xmm0 = xmm0[1,0]
6763 ; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07]
6764 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6766 ; X64-AVX512-LABEL: test_mm_storer_pd:
6767 ; X64-AVX512: # %bb.0:
6768 ; X64-AVX512-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6769 ; X64-AVX512-NEXT: # xmm0 = xmm0[1,0]
6770 ; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07]
6771 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6773 ; X32-SSE-LABEL: test_mm_storer_pd:
6775 ; X32-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
6776 ; X32-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
6777 ; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07]
6778 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6780 ; X32-AVX1-LABEL: test_mm_storer_pd:
6781 ; X32-AVX1: # %bb.0:
6782 ; X32-AVX1-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6783 ; X32-AVX1-NEXT: # xmm0 = xmm0[1,0]
6784 ; X32-AVX1-NEXT: vmovapd %xmm0, (%edi) # encoding: [0x67,0xc5,0xf9,0x29,0x07]
6785 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6787 ; X32-AVX512-LABEL: test_mm_storer_pd:
6788 ; X32-AVX512: # %bb.0:
6789 ; X32-AVX512-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc0,0x01]
6790 ; X32-AVX512-NEXT: # xmm0 = xmm0[1,0]
6791 ; X32-AVX512-NEXT: vmovapd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf9,0x29,0x07]
6792 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6793 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
6794 store <2 x double> %shuf, ptr %a0, align 16
6798 define void @test_mm_storeu_pd(ptr%a0, <2 x double> %a1) {
6799 ; X86-SSE-LABEL: test_mm_storeu_pd:
6801 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6802 ; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
6803 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6805 ; X86-AVX1-LABEL: test_mm_storeu_pd:
6806 ; X86-AVX1: # %bb.0:
6807 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6808 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
6809 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6811 ; X86-AVX512-LABEL: test_mm_storeu_pd:
6812 ; X86-AVX512: # %bb.0:
6813 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6814 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
6815 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6817 ; X64-SSE-LABEL: test_mm_storeu_pd:
6819 ; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07]
6820 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6822 ; X64-AVX1-LABEL: test_mm_storeu_pd:
6823 ; X64-AVX1: # %bb.0:
6824 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
6825 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6827 ; X64-AVX512-LABEL: test_mm_storeu_pd:
6828 ; X64-AVX512: # %bb.0:
6829 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
6830 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6832 ; X32-SSE-LABEL: test_mm_storeu_pd:
6834 ; X32-SSE-NEXT: movups %xmm0, (%edi) # encoding: [0x67,0x0f,0x11,0x07]
6835 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6837 ; X32-AVX1-LABEL: test_mm_storeu_pd:
6838 ; X32-AVX1: # %bb.0:
6839 ; X32-AVX1-NEXT: vmovups %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x11,0x07]
6840 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6842 ; X32-AVX512-LABEL: test_mm_storeu_pd:
6843 ; X32-AVX512: # %bb.0:
6844 ; X32-AVX512-NEXT: vmovups %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x11,0x07]
6845 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6846 store <2 x double> %a1, ptr %a0, align 1
6850 define void @test_mm_storeu_si128(ptr%a0, <2 x i64> %a1) {
6851 ; X86-SSE-LABEL: test_mm_storeu_si128:
6853 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6854 ; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00]
6855 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6857 ; X86-AVX1-LABEL: test_mm_storeu_si128:
6858 ; X86-AVX1: # %bb.0:
6859 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6860 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
6861 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6863 ; X86-AVX512-LABEL: test_mm_storeu_si128:
6864 ; X86-AVX512: # %bb.0:
6865 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6866 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
6867 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6869 ; X64-SSE-LABEL: test_mm_storeu_si128:
6871 ; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07]
6872 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6874 ; X64-AVX1-LABEL: test_mm_storeu_si128:
6875 ; X64-AVX1: # %bb.0:
6876 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
6877 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6879 ; X64-AVX512-LABEL: test_mm_storeu_si128:
6880 ; X64-AVX512: # %bb.0:
6881 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
6882 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6884 ; X32-SSE-LABEL: test_mm_storeu_si128:
6886 ; X32-SSE-NEXT: movups %xmm0, (%edi) # encoding: [0x67,0x0f,0x11,0x07]
6887 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6889 ; X32-AVX1-LABEL: test_mm_storeu_si128:
6890 ; X32-AVX1: # %bb.0:
6891 ; X32-AVX1-NEXT: vmovups %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x11,0x07]
6892 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6894 ; X32-AVX512-LABEL: test_mm_storeu_si128:
6895 ; X32-AVX512: # %bb.0:
6896 ; X32-AVX512-NEXT: vmovups %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x11,0x07]
6897 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6898 store <2 x i64> %a1, ptr %a0, align 1
6902 define void @test_mm_storeu_si64(ptr nocapture %A, <2 x i64> %B) {
6903 ; X86-SSE-LABEL: test_mm_storeu_si64:
6904 ; X86-SSE: # %bb.0: # %entry
6905 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6906 ; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00]
6907 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6909 ; X86-AVX1-LABEL: test_mm_storeu_si64:
6910 ; X86-AVX1: # %bb.0: # %entry
6911 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6912 ; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00]
6913 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6915 ; X86-AVX512-LABEL: test_mm_storeu_si64:
6916 ; X86-AVX512: # %bb.0: # %entry
6917 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6918 ; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
6919 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6921 ; X64-SSE-LABEL: test_mm_storeu_si64:
6922 ; X64-SSE: # %bb.0: # %entry
6923 ; X64-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
6924 ; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
6925 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6927 ; X64-AVX1-LABEL: test_mm_storeu_si64:
6928 ; X64-AVX1: # %bb.0: # %entry
6929 ; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
6930 ; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
6931 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6933 ; X64-AVX512-LABEL: test_mm_storeu_si64:
6934 ; X64-AVX512: # %bb.0: # %entry
6935 ; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
6936 ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
6937 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
6939 ; X32-SSE-LABEL: test_mm_storeu_si64:
6940 ; X32-SSE: # %bb.0: # %entry
6941 ; X32-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
6942 ; X32-SSE-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
6943 ; X32-SSE-NEXT: retq # encoding: [0xc3]
6945 ; X32-AVX1-LABEL: test_mm_storeu_si64:
6946 ; X32-AVX1: # %bb.0: # %entry
6947 ; X32-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
6948 ; X32-AVX1-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
6949 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
6951 ; X32-AVX512-LABEL: test_mm_storeu_si64:
6952 ; X32-AVX512: # %bb.0: # %entry
6953 ; X32-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
6954 ; X32-AVX512-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07]
6955 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
6957 %vecext.i = extractelement <2 x i64> %B, i32 0
6958 store i64 %vecext.i, ptr %A, align 1
6962 define void @test_mm_storeu_si32(ptr nocapture %A, <2 x i64> %B) {
6963 ; X86-SSE-LABEL: test_mm_storeu_si32:
6964 ; X86-SSE: # %bb.0: # %entry
6965 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6966 ; X86-SSE-NEXT: movd %xmm0, %ecx # encoding: [0x66,0x0f,0x7e,0xc1]
6967 ; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08]
6968 ; X86-SSE-NEXT: retl # encoding: [0xc3]
6970 ; X86-AVX1-LABEL: test_mm_storeu_si32:
6971 ; X86-AVX1: # %bb.0: # %entry
6972 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6973 ; X86-AVX1-NEXT: vmovd %xmm0, %ecx # encoding: [0xc5,0xf9,0x7e,0xc1]
6974 ; X86-AVX1-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08]
6975 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
6977 ; X86-AVX512-LABEL: test_mm_storeu_si32:
6978 ; X86-AVX512: # %bb.0: # %entry
6979 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6980 ; X86-AVX512-NEXT: vmovd %xmm0, %ecx # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc1]
6981 ; X86-AVX512-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08]
6982 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
6984 ; X64-SSE-LABEL: test_mm_storeu_si32:
6985 ; X64-SSE: # %bb.0: # %entry
6986 ; X64-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
6987 ; X64-SSE-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07]
6988 ; X64-SSE-NEXT: retq # encoding: [0xc3]
6990 ; X64-AVX1-LABEL: test_mm_storeu_si32:
6991 ; X64-AVX1: # %bb.0: # %entry
6992 ; X64-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
6993 ; X64-AVX1-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07]
6994 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
6996 ; X64-AVX512-LABEL: test_mm_storeu_si32:
6997 ; X64-AVX512: # %bb.0: # %entry
6998 ; X64-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
6999 ; X64-AVX512-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07]
7000 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
7002 ; X32-SSE-LABEL: test_mm_storeu_si32:
7003 ; X32-SSE: # %bb.0: # %entry
7004 ; X32-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
7005 ; X32-SSE-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07]
7006 ; X32-SSE-NEXT: retq # encoding: [0xc3]
7008 ; X32-AVX1-LABEL: test_mm_storeu_si32:
7009 ; X32-AVX1: # %bb.0: # %entry
7010 ; X32-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
7011 ; X32-AVX1-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07]
7012 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
7014 ; X32-AVX512-LABEL: test_mm_storeu_si32:
7015 ; X32-AVX512: # %bb.0: # %entry
7016 ; X32-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
7017 ; X32-AVX512-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07]
7018 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
7020 %0 = bitcast <2 x i64> %B to <4 x i32>
7021 %vecext.i = extractelement <4 x i32> %0, i32 0
7022 store i32 %vecext.i, ptr %A, align 1
7026 define void @test_mm_storeu_si16(ptr nocapture %A, <2 x i64> %B) {
7027 ; X86-SSE-LABEL: test_mm_storeu_si16:
7028 ; X86-SSE: # %bb.0: # %entry
7029 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7030 ; X86-SSE-NEXT: movd %xmm0, %ecx # encoding: [0x66,0x0f,0x7e,0xc1]
7031 ; X86-SSE-NEXT: movw %cx, (%eax) # encoding: [0x66,0x89,0x08]
7032 ; X86-SSE-NEXT: retl # encoding: [0xc3]
7034 ; X86-AVX1-LABEL: test_mm_storeu_si16:
7035 ; X86-AVX1: # %bb.0: # %entry
7036 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7037 ; X86-AVX1-NEXT: vmovd %xmm0, %ecx # encoding: [0xc5,0xf9,0x7e,0xc1]
7038 ; X86-AVX1-NEXT: movw %cx, (%eax) # encoding: [0x66,0x89,0x08]
7039 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
7041 ; X86-AVX512-LABEL: test_mm_storeu_si16:
7042 ; X86-AVX512: # %bb.0: # %entry
7043 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7044 ; X86-AVX512-NEXT: vmovd %xmm0, %ecx # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc1]
7045 ; X86-AVX512-NEXT: movw %cx, (%eax) # encoding: [0x66,0x89,0x08]
7046 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
7048 ; X64-SSE-LABEL: test_mm_storeu_si16:
7049 ; X64-SSE: # %bb.0: # %entry
7050 ; X64-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
7051 ; X64-SSE-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
7052 ; X64-SSE-NEXT: retq # encoding: [0xc3]
7054 ; X64-AVX1-LABEL: test_mm_storeu_si16:
7055 ; X64-AVX1: # %bb.0: # %entry
7056 ; X64-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
7057 ; X64-AVX1-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
7058 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
7060 ; X64-AVX512-LABEL: test_mm_storeu_si16:
7061 ; X64-AVX512: # %bb.0: # %entry
7062 ; X64-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
7063 ; X64-AVX512-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
7064 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
7066 ; X32-SSE-LABEL: test_mm_storeu_si16:
7067 ; X32-SSE: # %bb.0: # %entry
7068 ; X32-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0]
7069 ; X32-SSE-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07]
7070 ; X32-SSE-NEXT: retq # encoding: [0xc3]
7072 ; X32-AVX1-LABEL: test_mm_storeu_si16:
7073 ; X32-AVX1: # %bb.0: # %entry
7074 ; X32-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0]
7075 ; X32-AVX1-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07]
7076 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
7078 ; X32-AVX512-LABEL: test_mm_storeu_si16:
7079 ; X32-AVX512: # %bb.0: # %entry
7080 ; X32-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0]
7081 ; X32-AVX512-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07]
7082 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
7084 %0 = bitcast <2 x i64> %B to <8 x i16>
7085 %vecext.i = extractelement <8 x i16> %0, i32 0
7086 store i16 %vecext.i, ptr %A, align 1
7090 define void @test_mm_stream_pd(ptr%a0, <2 x double> %a1) {
7091 ; X86-SSE-LABEL: test_mm_stream_pd:
7093 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7094 ; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00]
7095 ; X86-SSE-NEXT: retl # encoding: [0xc3]
7097 ; X86-AVX1-LABEL: test_mm_stream_pd:
7098 ; X86-AVX1: # %bb.0:
7099 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7100 ; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00]
7101 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
7103 ; X86-AVX512-LABEL: test_mm_stream_pd:
7104 ; X86-AVX512: # %bb.0:
7105 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7106 ; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00]
7107 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
7109 ; X64-SSE-LABEL: test_mm_stream_pd:
7111 ; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07]
7112 ; X64-SSE-NEXT: retq # encoding: [0xc3]
7114 ; X64-AVX1-LABEL: test_mm_stream_pd:
7115 ; X64-AVX1: # %bb.0:
7116 ; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07]
7117 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
7119 ; X64-AVX512-LABEL: test_mm_stream_pd:
7120 ; X64-AVX512: # %bb.0:
7121 ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
7122 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
7124 ; X32-SSE-LABEL: test_mm_stream_pd:
7126 ; X32-SSE-NEXT: movntps %xmm0, (%edi) # encoding: [0x67,0x0f,0x2b,0x07]
7127 ; X32-SSE-NEXT: retq # encoding: [0xc3]
7129 ; X32-AVX1-LABEL: test_mm_stream_pd:
7130 ; X32-AVX1: # %bb.0:
7131 ; X32-AVX1-NEXT: vmovntps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x2b,0x07]
7132 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
7134 ; X32-AVX512-LABEL: test_mm_stream_pd:
7135 ; X32-AVX512: # %bb.0:
7136 ; X32-AVX512-NEXT: vmovntps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x2b,0x07]
7137 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
7138 store <2 x double> %a1, ptr %a0, align 16, !nontemporal !0
7142 define void @test_mm_stream_si32(ptr%a0, i32 %a1) {
7143 ; X86-LABEL: test_mm_stream_si32:
7145 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
7146 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
7147 ; X86-NEXT: movntil %eax, (%ecx) # encoding: [0x0f,0xc3,0x01]
7148 ; X86-NEXT: retl # encoding: [0xc3]
7150 ; X64-LABEL: test_mm_stream_si32:
7152 ; X64-NEXT: movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37]
7153 ; X64-NEXT: retq # encoding: [0xc3]
7155 ; X32-LABEL: test_mm_stream_si32:
7157 ; X32-NEXT: movntil %esi, (%edi) # encoding: [0x67,0x0f,0xc3,0x37]
7158 ; X32-NEXT: retq # encoding: [0xc3]
7159 store i32 %a1, ptr %a0, align 1, !nontemporal !0
7163 define void @test_mm_stream_si128(ptr%a0, <2 x i64> %a1) {
7164 ; X86-SSE-LABEL: test_mm_stream_si128:
7166 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7167 ; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00]
7168 ; X86-SSE-NEXT: retl # encoding: [0xc3]
7170 ; X86-AVX1-LABEL: test_mm_stream_si128:
7171 ; X86-AVX1: # %bb.0:
7172 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7173 ; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00]
7174 ; X86-AVX1-NEXT: retl # encoding: [0xc3]
7176 ; X86-AVX512-LABEL: test_mm_stream_si128:
7177 ; X86-AVX512: # %bb.0:
7178 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7179 ; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00]
7180 ; X86-AVX512-NEXT: retl # encoding: [0xc3]
7182 ; X64-SSE-LABEL: test_mm_stream_si128:
7184 ; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07]
7185 ; X64-SSE-NEXT: retq # encoding: [0xc3]
7187 ; X64-AVX1-LABEL: test_mm_stream_si128:
7188 ; X64-AVX1: # %bb.0:
7189 ; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07]
7190 ; X64-AVX1-NEXT: retq # encoding: [0xc3]
7192 ; X64-AVX512-LABEL: test_mm_stream_si128:
7193 ; X64-AVX512: # %bb.0:
7194 ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
7195 ; X64-AVX512-NEXT: retq # encoding: [0xc3]
7197 ; X32-SSE-LABEL: test_mm_stream_si128:
7199 ; X32-SSE-NEXT: movntps %xmm0, (%edi) # encoding: [0x67,0x0f,0x2b,0x07]
7200 ; X32-SSE-NEXT: retq # encoding: [0xc3]
7202 ; X32-AVX1-LABEL: test_mm_stream_si128:
7203 ; X32-AVX1: # %bb.0:
7204 ; X32-AVX1-NEXT: vmovntps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x2b,0x07]
7205 ; X32-AVX1-NEXT: retq # encoding: [0xc3]
7207 ; X32-AVX512-LABEL: test_mm_stream_si128:
7208 ; X32-AVX512: # %bb.0:
7209 ; X32-AVX512-NEXT: vmovntps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x2b,0x07]
7210 ; X32-AVX512-NEXT: retq # encoding: [0xc3]
7211 store <2 x i64> %a1, ptr %a0, align 16, !nontemporal !0
7215 define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
7216 ; SSE-LABEL: test_mm_sub_epi8:
7218 ; SSE-NEXT: psubb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf8,0xc1]
7219 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7221 ; AVX1-LABEL: test_mm_sub_epi8:
7223 ; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
7224 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7226 ; AVX512-LABEL: test_mm_sub_epi8:
7228 ; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
7229 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7230 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
7231 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
7232 %res = sub <16 x i8> %arg0, %arg1
7233 %bc = bitcast <16 x i8> %res to <2 x i64>
7237 define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
7238 ; SSE-LABEL: test_mm_sub_epi16:
7240 ; SSE-NEXT: psubw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf9,0xc1]
7241 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7243 ; AVX1-LABEL: test_mm_sub_epi16:
7245 ; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf9,0xc1]
7246 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7248 ; AVX512-LABEL: test_mm_sub_epi16:
7250 ; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1]
7251 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7252 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
7253 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
7254 %res = sub <8 x i16> %arg0, %arg1
7255 %bc = bitcast <8 x i16> %res to <2 x i64>
7259 define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
7260 ; SSE-LABEL: test_mm_sub_epi32:
7262 ; SSE-NEXT: psubd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfa,0xc1]
7263 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7265 ; AVX1-LABEL: test_mm_sub_epi32:
7267 ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfa,0xc1]
7268 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7270 ; AVX512-LABEL: test_mm_sub_epi32:
7272 ; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1]
7273 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7274 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
7275 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
7276 %res = sub <4 x i32> %arg0, %arg1
7277 %bc = bitcast <4 x i32> %res to <2 x i64>
7281 define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
7282 ; SSE-LABEL: test_mm_sub_epi64:
7284 ; SSE-NEXT: psubq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfb,0xc1]
7285 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7287 ; AVX1-LABEL: test_mm_sub_epi64:
7289 ; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
7290 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7292 ; AVX512-LABEL: test_mm_sub_epi64:
7294 ; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
7295 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7296 %res = sub <2 x i64> %a0, %a1
7300 define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
7301 ; SSE-LABEL: test_mm_sub_pd:
7303 ; SSE-NEXT: subpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5c,0xc1]
7304 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7306 ; AVX1-LABEL: test_mm_sub_pd:
7308 ; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5c,0xc1]
7309 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7311 ; AVX512-LABEL: test_mm_sub_pd:
7313 ; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5c,0xc1]
7314 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7315 %res = fsub <2 x double> %a0, %a1
7316 ret <2 x double> %res
7319 define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
7320 ; SSE-LABEL: test_mm_sub_sd:
7322 ; SSE-NEXT: subsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5c,0xc1]
7323 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7325 ; AVX1-LABEL: test_mm_sub_sd:
7327 ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5c,0xc1]
7328 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7330 ; AVX512-LABEL: test_mm_sub_sd:
7332 ; AVX512-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1]
7333 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7334 %ext0 = extractelement <2 x double> %a0, i32 0
7335 %ext1 = extractelement <2 x double> %a1, i32 0
7336 %fsub = fsub double %ext0, %ext1
7337 %res = insertelement <2 x double> %a0, double %fsub, i32 0
7338 ret <2 x double> %res
7341 define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
7342 ; SSE-LABEL: test_mm_subs_epi8:
7344 ; SSE-NEXT: psubsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe8,0xc1]
7345 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7347 ; AVX1-LABEL: test_mm_subs_epi8:
7349 ; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe8,0xc1]
7350 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7352 ; AVX512-LABEL: test_mm_subs_epi8:
7354 ; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
7355 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7356 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
7357 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
7358 %res = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
7359 %bc = bitcast <16 x i8> %res to <2 x i64>
7362 declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
7364 define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
7365 ; SSE-LABEL: test_mm_subs_epi16:
7367 ; SSE-NEXT: psubsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe9,0xc1]
7368 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7370 ; AVX1-LABEL: test_mm_subs_epi16:
7372 ; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe9,0xc1]
7373 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7375 ; AVX512-LABEL: test_mm_subs_epi16:
7377 ; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
7378 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7379 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
7380 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
7381 %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
7382 %bc = bitcast <8 x i16> %res to <2 x i64>
7385 declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
7387 define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
7388 ; SSE-LABEL: test_mm_subs_epu8:
7390 ; SSE-NEXT: psubusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd8,0xc1]
7391 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7393 ; AVX1-LABEL: test_mm_subs_epu8:
7395 ; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd8,0xc1]
7396 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7398 ; AVX512-LABEL: test_mm_subs_epu8:
7400 ; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
7401 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7402 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
7403 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
7404 %res = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %arg0, <16 x i8> %arg1)
7405 %bc = bitcast <16 x i8> %res to <2 x i64>
7408 declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
7410 define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
7411 ; SSE-LABEL: test_mm_subs_epu16:
7413 ; SSE-NEXT: psubusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd9,0xc1]
7414 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7416 ; AVX1-LABEL: test_mm_subs_epu16:
7418 ; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd9,0xc1]
7419 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7421 ; AVX512-LABEL: test_mm_subs_epu16:
7423 ; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
7424 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7425 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
7426 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
7427 %res = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %arg0, <8 x i16> %arg1)
7428 %bc = bitcast <8 x i16> %res to <2 x i64>
7431 declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
7433 define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
7434 ; SSE-LABEL: test_mm_ucomieq_sd:
7436 ; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
7437 ; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
7438 ; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
7439 ; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
7440 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
7441 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7443 ; AVX1-LABEL: test_mm_ucomieq_sd:
7445 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
7446 ; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
7447 ; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
7448 ; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
7449 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
7450 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7452 ; AVX512-LABEL: test_mm_ucomieq_sd:
7454 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
7455 ; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
7456 ; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
7457 ; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
7458 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
7459 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7460 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
7463 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
7465 define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
7466 ; SSE-LABEL: test_mm_ucomige_sd:
7468 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7469 ; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
7470 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
7471 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7473 ; AVX1-LABEL: test_mm_ucomige_sd:
7475 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7476 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
7477 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
7478 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7480 ; AVX512-LABEL: test_mm_ucomige_sd:
7482 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7483 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
7484 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
7485 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7486 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
7489 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
7491 define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
7492 ; SSE-LABEL: test_mm_ucomigt_sd:
7494 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7495 ; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
7496 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
7497 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7499 ; AVX1-LABEL: test_mm_ucomigt_sd:
7501 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7502 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
7503 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
7504 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7506 ; AVX512-LABEL: test_mm_ucomigt_sd:
7508 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7509 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
7510 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
7511 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7512 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
7515 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
7517 define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
7518 ; SSE-LABEL: test_mm_ucomile_sd:
7520 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7521 ; SSE-NEXT: ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8]
7522 ; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
7523 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7525 ; AVX1-LABEL: test_mm_ucomile_sd:
7527 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7528 ; AVX1-NEXT: vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8]
7529 ; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
7530 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7532 ; AVX512-LABEL: test_mm_ucomile_sd:
7534 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7535 ; AVX512-NEXT: vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
7536 ; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
7537 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7538 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
7541 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
7543 define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
7544 ; SSE-LABEL: test_mm_ucomilt_sd:
7546 ; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7547 ; SSE-NEXT: ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8]
7548 ; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
7549 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7551 ; AVX1-LABEL: test_mm_ucomilt_sd:
7553 ; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7554 ; AVX1-NEXT: vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8]
7555 ; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
7556 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7558 ; AVX512-LABEL: test_mm_ucomilt_sd:
7560 ; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
7561 ; AVX512-NEXT: vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
7562 ; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
7563 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7564 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
7567 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
7569 define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
7570 ; SSE-LABEL: test_mm_ucomineq_sd:
7572 ; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1]
7573 ; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
7574 ; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
7575 ; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
7576 ; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
7577 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7579 ; AVX1-LABEL: test_mm_ucomineq_sd:
7581 ; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1]
7582 ; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
7583 ; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
7584 ; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
7585 ; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
7586 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7588 ; AVX512-LABEL: test_mm_ucomineq_sd:
7590 ; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
7591 ; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
7592 ; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
7593 ; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
7594 ; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
7595 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7596 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
7599 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
7601 define <2 x double> @test_mm_undefined_pd() {
7602 ; CHECK-LABEL: test_mm_undefined_pd:
7604 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7605 ret <2 x double> undef
7608 define <2 x i64> @test_mm_undefined_si128() {
7609 ; CHECK-LABEL: test_mm_undefined_si128:
7611 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7615 define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
7616 ; SSE-LABEL: test_mm_unpackhi_epi8:
7618 ; SSE-NEXT: punpckhbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x68,0xc1]
7619 ; SSE-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
7620 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7622 ; AVX1-LABEL: test_mm_unpackhi_epi8:
7624 ; AVX1-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x68,0xc1]
7625 ; AVX1-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
7626 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7628 ; AVX512-LABEL: test_mm_unpackhi_epi8:
7630 ; AVX512-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1]
7631 ; AVX512-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
7632 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7633 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
7634 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
7635 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
7636 %bc = bitcast <16 x i8> %res to <2 x i64>
7640 define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
7641 ; SSE-LABEL: test_mm_unpackhi_epi16:
7643 ; SSE-NEXT: punpckhwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x69,0xc1]
7644 ; SSE-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7645 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7647 ; AVX1-LABEL: test_mm_unpackhi_epi16:
7649 ; AVX1-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x69,0xc1]
7650 ; AVX1-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7651 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7653 ; AVX512-LABEL: test_mm_unpackhi_epi16:
7655 ; AVX512-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1]
7656 ; AVX512-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7657 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7658 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
7659 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
7660 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
7661 %bc = bitcast <8 x i16> %res to <2 x i64>
7665 define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
7666 ; SSE-LABEL: test_mm_unpackhi_epi32:
7668 ; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1]
7669 ; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7670 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7672 ; AVX1-LABEL: test_mm_unpackhi_epi32:
7674 ; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1]
7675 ; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7676 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7678 ; AVX512-LABEL: test_mm_unpackhi_epi32:
7680 ; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
7681 ; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7682 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7683 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
7684 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
7685 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
7686 %bc = bitcast <4 x i32> %res to <2 x i64>
7690 define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
7691 ; SSE-LABEL: test_mm_unpackhi_epi64:
7693 ; SSE-NEXT: unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1]
7694 ; SSE-NEXT: # xmm0 = xmm0[1],xmm1[1]
7695 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7697 ; AVX1-LABEL: test_mm_unpackhi_epi64:
7699 ; AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
7700 ; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1]
7701 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7703 ; AVX512-LABEL: test_mm_unpackhi_epi64:
7705 ; AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
7706 ; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1]
7707 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7708 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
7712 define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
7713 ; SSE-LABEL: test_mm_unpackhi_pd:
7715 ; SSE-NEXT: unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1]
7716 ; SSE-NEXT: # xmm0 = xmm0[1],xmm1[1]
7717 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7719 ; AVX1-LABEL: test_mm_unpackhi_pd:
7721 ; AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1]
7722 ; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1]
7723 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7725 ; AVX512-LABEL: test_mm_unpackhi_pd:
7727 ; AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
7728 ; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1]
7729 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7730 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
7731 ret <2 x double> %res
7734 define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
7735 ; SSE-LABEL: test_mm_unpacklo_epi8:
7737 ; SSE-NEXT: punpcklbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x60,0xc1]
7738 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7739 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7741 ; AVX1-LABEL: test_mm_unpacklo_epi8:
7743 ; AVX1-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x60,0xc1]
7744 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7745 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7747 ; AVX512-LABEL: test_mm_unpacklo_epi8:
7749 ; AVX512-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1]
7750 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
7751 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7752 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
7753 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
7754 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
7755 %bc = bitcast <16 x i8> %res to <2 x i64>
7759 define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
7760 ; SSE-LABEL: test_mm_unpacklo_epi16:
7762 ; SSE-NEXT: punpcklwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x61,0xc1]
7763 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7764 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7766 ; AVX1-LABEL: test_mm_unpacklo_epi16:
7768 ; AVX1-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x61,0xc1]
7769 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7770 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7772 ; AVX512-LABEL: test_mm_unpacklo_epi16:
7774 ; AVX512-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1]
7775 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7776 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7777 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
7778 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
7779 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
7780 %bc = bitcast <8 x i16> %res to <2 x i64>
7784 define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
7785 ; SSE-LABEL: test_mm_unpacklo_epi32:
7787 ; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1]
7788 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7789 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7791 ; AVX1-LABEL: test_mm_unpacklo_epi32:
7793 ; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1]
7794 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7795 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7797 ; AVX512-LABEL: test_mm_unpacklo_epi32:
7799 ; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1]
7800 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7801 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7802 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
7803 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
7804 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
7805 %bc = bitcast <4 x i32> %res to <2 x i64>
7809 define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
7810 ; SSE-LABEL: test_mm_unpacklo_epi64:
7812 ; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
7813 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
7814 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7816 ; AVX1-LABEL: test_mm_unpacklo_epi64:
7818 ; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
7819 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0]
7820 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7822 ; AVX512-LABEL: test_mm_unpacklo_epi64:
7824 ; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
7825 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0]
7826 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7827 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
7831 define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
7832 ; SSE-LABEL: test_mm_unpacklo_pd:
7834 ; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
7835 ; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
7836 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7838 ; AVX1-LABEL: test_mm_unpacklo_pd:
7840 ; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1]
7841 ; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0]
7842 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7844 ; AVX512-LABEL: test_mm_unpacklo_pd:
7846 ; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
7847 ; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0]
7848 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7849 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
7850 ret <2 x double> %res
7853 define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
7854 ; SSE-LABEL: test_mm_xor_pd:
7856 ; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1]
7857 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7859 ; AVX1-LABEL: test_mm_xor_pd:
7861 ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
7862 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7864 ; AVX512-LABEL: test_mm_xor_pd:
7866 ; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
7867 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7868 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
7869 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
7870 %res = xor <4 x i32> %arg0, %arg1
7871 %bc = bitcast <4 x i32> %res to <2 x double>
7872 ret <2 x double> %bc
7875 define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
7876 ; SSE-LABEL: test_mm_xor_si128:
7878 ; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1]
7879 ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7881 ; AVX1-LABEL: test_mm_xor_si128:
7883 ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
7884 ; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7886 ; AVX512-LABEL: test_mm_xor_si128:
7888 ; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
7889 ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7890 %res = xor <2 x i64> %a0, %a1