1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c
7 define zeroext i16 @test_mm_test_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) {
8 ; CHECK-LABEL: test_mm_test_epi8_mask:
9 ; CHECK: # %bb.0: # %entry
10 ; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0
11 ; CHECK-NEXT: kmovd %k0, %eax
12 ; CHECK-NEXT: movzwl %ax, %eax
13 ; CHECK-NEXT: ret{{[l|q]}}
15 %and.i.i = and <2 x i64> %__B, %__A
16 %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
17 %1 = icmp ne <16 x i8> %0, zeroinitializer
18 %2 = bitcast <16 x i1> %1 to i16
22 define zeroext i16 @test_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
23 ; X86-LABEL: test_mm_mask_test_epi8_mask:
24 ; X86: # %bb.0: # %entry
25 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
26 ; X86-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1}
27 ; X86-NEXT: kmovd %k0, %eax
28 ; X86-NEXT: movzwl %ax, %eax
31 ; X64-LABEL: test_mm_mask_test_epi8_mask:
32 ; X64: # %bb.0: # %entry
33 ; X64-NEXT: kmovd %edi, %k1
34 ; X64-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1}
35 ; X64-NEXT: kmovd %k0, %eax
36 ; X64-NEXT: movzwl %ax, %eax
39 %and.i.i = and <2 x i64> %__B, %__A
40 %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
41 %1 = icmp ne <16 x i8> %0, zeroinitializer
42 %2 = bitcast i16 %__U to <16 x i1>
43 %3 = and <16 x i1> %1, %2
44 %4 = bitcast <16 x i1> %3 to i16
48 define i32 @test_mm256_test_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) {
49 ; CHECK-LABEL: test_mm256_test_epi8_mask:
50 ; CHECK: # %bb.0: # %entry
51 ; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0
52 ; CHECK-NEXT: kmovd %k0, %eax
53 ; CHECK-NEXT: vzeroupper
54 ; CHECK-NEXT: ret{{[l|q]}}
56 %and.i.i = and <4 x i64> %__B, %__A
57 %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
58 %1 = icmp ne <32 x i8> %0, zeroinitializer
59 %2 = bitcast <32 x i1> %1 to i32
63 define i32 @test_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) {
64 ; X86-LABEL: test_mm256_mask_test_epi8_mask:
65 ; X86: # %bb.0: # %entry
66 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
67 ; X86-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1}
68 ; X86-NEXT: kmovd %k0, %eax
69 ; X86-NEXT: vzeroupper
72 ; X64-LABEL: test_mm256_mask_test_epi8_mask:
73 ; X64: # %bb.0: # %entry
74 ; X64-NEXT: kmovd %edi, %k1
75 ; X64-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1}
76 ; X64-NEXT: kmovd %k0, %eax
77 ; X64-NEXT: vzeroupper
80 %and.i.i = and <4 x i64> %__B, %__A
81 %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
82 %1 = icmp ne <32 x i8> %0, zeroinitializer
83 %2 = bitcast i32 %__U to <32 x i1>
84 %3 = and <32 x i1> %1, %2
85 %4 = bitcast <32 x i1> %3 to i32
89 define zeroext i8 @test_mm_test_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) {
90 ; CHECK-LABEL: test_mm_test_epi16_mask:
91 ; CHECK: # %bb.0: # %entry
92 ; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0
93 ; CHECK-NEXT: kmovd %k0, %eax
94 ; CHECK-NEXT: movzbl %al, %eax
95 ; CHECK-NEXT: ret{{[l|q]}}
97 %and.i.i = and <2 x i64> %__B, %__A
98 %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
99 %1 = icmp ne <8 x i16> %0, zeroinitializer
100 %2 = bitcast <8 x i1> %1 to i8
104 define zeroext i8 @test_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
105 ; X86-LABEL: test_mm_mask_test_epi16_mask:
106 ; X86: # %bb.0: # %entry
107 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
108 ; X86-NEXT: kmovd %eax, %k1
109 ; X86-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1}
110 ; X86-NEXT: kmovd %k0, %eax
111 ; X86-NEXT: movzbl %al, %eax
114 ; X64-LABEL: test_mm_mask_test_epi16_mask:
115 ; X64: # %bb.0: # %entry
116 ; X64-NEXT: kmovd %edi, %k1
117 ; X64-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1}
118 ; X64-NEXT: kmovd %k0, %eax
119 ; X64-NEXT: movzbl %al, %eax
122 %and.i.i = and <2 x i64> %__B, %__A
123 %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
124 %1 = icmp ne <8 x i16> %0, zeroinitializer
125 %2 = bitcast i8 %__U to <8 x i1>
126 %3 = and <8 x i1> %1, %2
127 %4 = bitcast <8 x i1> %3 to i8
131 define zeroext i16 @test_mm256_test_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) {
132 ; CHECK-LABEL: test_mm256_test_epi16_mask:
133 ; CHECK: # %bb.0: # %entry
134 ; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0
135 ; CHECK-NEXT: kmovd %k0, %eax
136 ; CHECK-NEXT: movzwl %ax, %eax
137 ; CHECK-NEXT: vzeroupper
138 ; CHECK-NEXT: ret{{[l|q]}}
140 %and.i.i = and <4 x i64> %__B, %__A
141 %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
142 %1 = icmp ne <16 x i16> %0, zeroinitializer
143 %2 = bitcast <16 x i1> %1 to i16
147 define zeroext i16 @test_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
148 ; X86-LABEL: test_mm256_mask_test_epi16_mask:
149 ; X86: # %bb.0: # %entry
150 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
151 ; X86-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1}
152 ; X86-NEXT: kmovd %k0, %eax
153 ; X86-NEXT: movzwl %ax, %eax
154 ; X86-NEXT: vzeroupper
157 ; X64-LABEL: test_mm256_mask_test_epi16_mask:
158 ; X64: # %bb.0: # %entry
159 ; X64-NEXT: kmovd %edi, %k1
160 ; X64-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1}
161 ; X64-NEXT: kmovd %k0, %eax
162 ; X64-NEXT: movzwl %ax, %eax
163 ; X64-NEXT: vzeroupper
166 %and.i.i = and <4 x i64> %__B, %__A
167 %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
168 %1 = icmp ne <16 x i16> %0, zeroinitializer
169 %2 = bitcast i16 %__U to <16 x i1>
170 %3 = and <16 x i1> %1, %2
171 %4 = bitcast <16 x i1> %3 to i16
175 define zeroext i16 @test_mm_testn_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) {
176 ; CHECK-LABEL: test_mm_testn_epi8_mask:
177 ; CHECK: # %bb.0: # %entry
178 ; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0
179 ; CHECK-NEXT: kmovd %k0, %eax
180 ; CHECK-NEXT: movzwl %ax, %eax
181 ; CHECK-NEXT: ret{{[l|q]}}
183 %and.i.i = and <2 x i64> %__B, %__A
184 %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
185 %1 = icmp eq <16 x i8> %0, zeroinitializer
186 %2 = bitcast <16 x i1> %1 to i16
190 define zeroext i16 @test_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
191 ; X86-LABEL: test_mm_mask_testn_epi8_mask:
192 ; X86: # %bb.0: # %entry
193 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
194 ; X86-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1}
195 ; X86-NEXT: kmovd %k0, %eax
196 ; X86-NEXT: movzwl %ax, %eax
199 ; X64-LABEL: test_mm_mask_testn_epi8_mask:
200 ; X64: # %bb.0: # %entry
201 ; X64-NEXT: kmovd %edi, %k1
202 ; X64-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1}
203 ; X64-NEXT: kmovd %k0, %eax
204 ; X64-NEXT: movzwl %ax, %eax
207 %and.i.i = and <2 x i64> %__B, %__A
208 %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
209 %1 = icmp eq <16 x i8> %0, zeroinitializer
210 %2 = bitcast i16 %__U to <16 x i1>
211 %3 = and <16 x i1> %1, %2
212 %4 = bitcast <16 x i1> %3 to i16
216 define i32 @test_mm256_testn_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) {
217 ; CHECK-LABEL: test_mm256_testn_epi8_mask:
218 ; CHECK: # %bb.0: # %entry
219 ; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0
220 ; CHECK-NEXT: kmovd %k0, %eax
221 ; CHECK-NEXT: vzeroupper
222 ; CHECK-NEXT: ret{{[l|q]}}
224 %and.i.i = and <4 x i64> %__B, %__A
225 %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
226 %1 = icmp eq <32 x i8> %0, zeroinitializer
227 %2 = bitcast <32 x i1> %1 to i32
231 define i32 @test_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) {
232 ; X86-LABEL: test_mm256_mask_testn_epi8_mask:
233 ; X86: # %bb.0: # %entry
234 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
235 ; X86-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1}
236 ; X86-NEXT: kmovd %k0, %eax
237 ; X86-NEXT: vzeroupper
240 ; X64-LABEL: test_mm256_mask_testn_epi8_mask:
241 ; X64: # %bb.0: # %entry
242 ; X64-NEXT: kmovd %edi, %k1
243 ; X64-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1}
244 ; X64-NEXT: kmovd %k0, %eax
245 ; X64-NEXT: vzeroupper
248 %and.i.i = and <4 x i64> %__B, %__A
249 %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
250 %1 = icmp eq <32 x i8> %0, zeroinitializer
251 %2 = bitcast i32 %__U to <32 x i1>
252 %3 = and <32 x i1> %1, %2
253 %4 = bitcast <32 x i1> %3 to i32
257 define zeroext i8 @test_mm_testn_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) {
258 ; CHECK-LABEL: test_mm_testn_epi16_mask:
259 ; CHECK: # %bb.0: # %entry
260 ; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0
261 ; CHECK-NEXT: kmovd %k0, %eax
262 ; CHECK-NEXT: movzbl %al, %eax
263 ; CHECK-NEXT: ret{{[l|q]}}
265 %and.i.i = and <2 x i64> %__B, %__A
266 %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
267 %1 = icmp eq <8 x i16> %0, zeroinitializer
268 %2 = bitcast <8 x i1> %1 to i8
272 define zeroext i8 @test_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
273 ; X86-LABEL: test_mm_mask_testn_epi16_mask:
274 ; X86: # %bb.0: # %entry
275 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
276 ; X86-NEXT: kmovd %eax, %k1
277 ; X86-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1}
278 ; X86-NEXT: kmovd %k0, %eax
279 ; X86-NEXT: movzbl %al, %eax
282 ; X64-LABEL: test_mm_mask_testn_epi16_mask:
283 ; X64: # %bb.0: # %entry
284 ; X64-NEXT: kmovd %edi, %k1
285 ; X64-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1}
286 ; X64-NEXT: kmovd %k0, %eax
287 ; X64-NEXT: movzbl %al, %eax
290 %and.i.i = and <2 x i64> %__B, %__A
291 %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
292 %1 = icmp eq <8 x i16> %0, zeroinitializer
293 %2 = bitcast i8 %__U to <8 x i1>
294 %3 = and <8 x i1> %1, %2
295 %4 = bitcast <8 x i1> %3 to i8
299 define zeroext i16 @test_mm256_testn_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) {
300 ; CHECK-LABEL: test_mm256_testn_epi16_mask:
301 ; CHECK: # %bb.0: # %entry
302 ; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0
303 ; CHECK-NEXT: kmovd %k0, %eax
304 ; CHECK-NEXT: movzwl %ax, %eax
305 ; CHECK-NEXT: vzeroupper
306 ; CHECK-NEXT: ret{{[l|q]}}
308 %and.i.i = and <4 x i64> %__B, %__A
309 %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
310 %1 = icmp eq <16 x i16> %0, zeroinitializer
311 %2 = bitcast <16 x i1> %1 to i16
315 define zeroext i16 @test_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
316 ; X86-LABEL: test_mm256_mask_testn_epi16_mask:
317 ; X86: # %bb.0: # %entry
318 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
319 ; X86-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1}
320 ; X86-NEXT: kmovd %k0, %eax
321 ; X86-NEXT: movzwl %ax, %eax
322 ; X86-NEXT: vzeroupper
325 ; X64-LABEL: test_mm256_mask_testn_epi16_mask:
326 ; X64: # %bb.0: # %entry
327 ; X64-NEXT: kmovd %edi, %k1
328 ; X64-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1}
329 ; X64-NEXT: kmovd %k0, %eax
330 ; X64-NEXT: movzwl %ax, %eax
331 ; X64-NEXT: vzeroupper
334 %and.i.i = and <4 x i64> %__B, %__A
335 %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
336 %1 = icmp eq <16 x i16> %0, zeroinitializer
337 %2 = bitcast i16 %__U to <16 x i1>
338 %3 = and <16 x i1> %1, %2
339 %4 = bitcast <16 x i1> %3 to i16
343 define <2 x i64> @test_mm_mask_set1_epi8(<2 x i64> %__O, i16 zeroext %__M, i8 signext %__A) local_unnamed_addr #0 {
344 ; X86-LABEL: test_mm_mask_set1_epi8:
345 ; X86: # %bb.0: # %entry
346 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
347 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
348 ; X86-NEXT: vpbroadcastb %eax, %xmm0 {%k1}
351 ; X64-LABEL: test_mm_mask_set1_epi8:
352 ; X64: # %bb.0: # %entry
353 ; X64-NEXT: kmovd %edi, %k1
354 ; X64-NEXT: vpbroadcastb %esi, %xmm0 {%k1}
357 %vecinit.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0
358 %vecinit15.i.i = shufflevector <16 x i8> %vecinit.i.i, <16 x i8> undef, <16 x i32> zeroinitializer
359 %0 = bitcast <2 x i64> %__O to <16 x i8>
360 %1 = bitcast i16 %__M to <16 x i1>
361 %2 = select <16 x i1> %1, <16 x i8> %vecinit15.i.i, <16 x i8> %0
362 %3 = bitcast <16 x i8> %2 to <2 x i64>
366 define <2 x i64> @test_mm_maskz_set1_epi8(i16 zeroext %__M, i8 signext %__A) {
367 ; X86-LABEL: test_mm_maskz_set1_epi8:
368 ; X86: # %bb.0: # %entry
369 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
370 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
371 ; X86-NEXT: vpbroadcastb %eax, %xmm0 {%k1} {z}
374 ; X64-LABEL: test_mm_maskz_set1_epi8:
375 ; X64: # %bb.0: # %entry
376 ; X64-NEXT: kmovd %edi, %k1
377 ; X64-NEXT: vpbroadcastb %esi, %xmm0 {%k1} {z}
380 %vecinit.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0
381 %vecinit15.i.i = shufflevector <16 x i8> %vecinit.i.i, <16 x i8> undef, <16 x i32> zeroinitializer
382 %0 = bitcast i16 %__M to <16 x i1>
383 %1 = select <16 x i1> %0, <16 x i8> %vecinit15.i.i, <16 x i8> zeroinitializer
384 %2 = bitcast <16 x i8> %1 to <2 x i64>
388 define <4 x i64> @test_mm256_mask_set1_epi8(<4 x i64> %__O, i32 %__M, i8 signext %__A){
389 ; X86-LABEL: test_mm256_mask_set1_epi8:
390 ; X86: # %bb.0: # %entry
391 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
392 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
393 ; X86-NEXT: vpbroadcastb %eax, %ymm0 {%k1}
396 ; X64-LABEL: test_mm256_mask_set1_epi8:
397 ; X64: # %bb.0: # %entry
398 ; X64-NEXT: kmovd %edi, %k1
399 ; X64-NEXT: vpbroadcastb %esi, %ymm0 {%k1}
402 %vecinit.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0
403 %vecinit31.i.i = shufflevector <32 x i8> %vecinit.i.i, <32 x i8> undef, <32 x i32> zeroinitializer
404 %0 = bitcast <4 x i64> %__O to <32 x i8>
405 %1 = bitcast i32 %__M to <32 x i1>
406 %2 = select <32 x i1> %1, <32 x i8> %vecinit31.i.i, <32 x i8> %0
407 %3 = bitcast <32 x i8> %2 to <4 x i64>
411 define <4 x i64> @test_mm256_maskz_set1_epi8(i32 %__M, i8 signext %__A) {
412 ; X86-LABEL: test_mm256_maskz_set1_epi8:
413 ; X86: # %bb.0: # %entry
414 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
415 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
416 ; X86-NEXT: vpbroadcastb %eax, %ymm0 {%k1} {z}
419 ; X64-LABEL: test_mm256_maskz_set1_epi8:
420 ; X64: # %bb.0: # %entry
421 ; X64-NEXT: kmovd %edi, %k1
422 ; X64-NEXT: vpbroadcastb %esi, %ymm0 {%k1} {z}
425 %vecinit.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0
426 %vecinit31.i.i = shufflevector <32 x i8> %vecinit.i.i, <32 x i8> undef, <32 x i32> zeroinitializer
427 %0 = bitcast i32 %__M to <32 x i1>
428 %1 = select <32 x i1> %0, <32 x i8> %vecinit31.i.i, <32 x i8> zeroinitializer
429 %2 = bitcast <32 x i8> %1 to <4 x i64>
433 define <4 x i64> @test_mm256_mask_set1_epi16(<4 x i64> %__O, i16 zeroext %__M, i16 signext %__A) {
434 ; X86-LABEL: test_mm256_mask_set1_epi16:
435 ; X86: # %bb.0: # %entry
436 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
437 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
438 ; X86-NEXT: vpbroadcastw %eax, %ymm0 {%k1}
441 ; X64-LABEL: test_mm256_mask_set1_epi16:
442 ; X64: # %bb.0: # %entry
443 ; X64-NEXT: kmovd %edi, %k1
444 ; X64-NEXT: vpbroadcastw %esi, %ymm0 {%k1}
447 %vecinit.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0
448 %vecinit15.i.i = shufflevector <16 x i16> %vecinit.i.i, <16 x i16> undef, <16 x i32> zeroinitializer
449 %0 = bitcast <4 x i64> %__O to <16 x i16>
450 %1 = bitcast i16 %__M to <16 x i1>
451 %2 = select <16 x i1> %1, <16 x i16> %vecinit15.i.i, <16 x i16> %0
452 %3 = bitcast <16 x i16> %2 to <4 x i64>
456 define <4 x i64> @test_mm256_maskz_set1_epi16(i16 zeroext %__M, i16 signext %__A) {
457 ; X86-LABEL: test_mm256_maskz_set1_epi16:
458 ; X86: # %bb.0: # %entry
459 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
460 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
461 ; X86-NEXT: vpbroadcastw %eax, %ymm0 {%k1} {z}
464 ; X64-LABEL: test_mm256_maskz_set1_epi16:
465 ; X64: # %bb.0: # %entry
466 ; X64-NEXT: kmovd %edi, %k1
467 ; X64-NEXT: vpbroadcastw %esi, %ymm0 {%k1} {z}
470 %vecinit.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0
471 %vecinit15.i.i = shufflevector <16 x i16> %vecinit.i.i, <16 x i16> undef, <16 x i32> zeroinitializer
472 %0 = bitcast i16 %__M to <16 x i1>
473 %1 = select <16 x i1> %0, <16 x i16> %vecinit15.i.i, <16 x i16> zeroinitializer
474 %2 = bitcast <16 x i16> %1 to <4 x i64>
478 define <2 x i64> @test_mm_mask_set1_epi16(<2 x i64> %__O, i8 zeroext %__M, i16 signext %__A) {
479 ; X86-LABEL: test_mm_mask_set1_epi16:
480 ; X86: # %bb.0: # %entry
481 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
482 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
483 ; X86-NEXT: kmovd %ecx, %k1
484 ; X86-NEXT: vpbroadcastw %eax, %xmm0 {%k1}
487 ; X64-LABEL: test_mm_mask_set1_epi16:
488 ; X64: # %bb.0: # %entry
489 ; X64-NEXT: kmovd %edi, %k1
490 ; X64-NEXT: vpbroadcastw %esi, %xmm0 {%k1}
493 %vecinit.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0
494 %vecinit7.i.i = shufflevector <8 x i16> %vecinit.i.i, <8 x i16> undef, <8 x i32> zeroinitializer
495 %0 = bitcast <2 x i64> %__O to <8 x i16>
496 %1 = bitcast i8 %__M to <8 x i1>
497 %2 = select <8 x i1> %1, <8 x i16> %vecinit7.i.i, <8 x i16> %0
498 %3 = bitcast <8 x i16> %2 to <2 x i64>
502 define <2 x i64> @test_mm_maskz_set1_epi16(i8 zeroext %__M, i16 signext %__A) {
503 ; X86-LABEL: test_mm_maskz_set1_epi16:
504 ; X86: # %bb.0: # %entry
505 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
506 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
507 ; X86-NEXT: kmovd %ecx, %k1
508 ; X86-NEXT: vpbroadcastw %eax, %xmm0 {%k1} {z}
511 ; X64-LABEL: test_mm_maskz_set1_epi16:
512 ; X64: # %bb.0: # %entry
513 ; X64-NEXT: kmovd %edi, %k1
514 ; X64-NEXT: vpbroadcastw %esi, %xmm0 {%k1} {z}
517 %vecinit.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0
518 %vecinit7.i.i = shufflevector <8 x i16> %vecinit.i.i, <8 x i16> undef, <8 x i32> zeroinitializer
519 %0 = bitcast i8 %__M to <8 x i1>
520 %1 = select <8 x i1> %0, <8 x i16> %vecinit7.i.i, <8 x i16> zeroinitializer
521 %2 = bitcast <8 x i16> %1 to <2 x i64>
526 define <2 x i64> @test_mm_broadcastb_epi8(<2 x i64> %a0) {
527 ; CHECK-LABEL: test_mm_broadcastb_epi8:
529 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
530 ; CHECK-NEXT: ret{{[l|q]}}
531 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
532 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <16 x i32> zeroinitializer
533 %res1 = bitcast <16 x i8> %res0 to <2 x i64>
537 define <2 x i64> @test_mm_mask_broadcastb_epi8(<2 x i64> %a0, i16 %a1, <2 x i64> %a2) {
538 ; X86-LABEL: test_mm_mask_broadcastb_epi8:
540 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
541 ; X86-NEXT: vpbroadcastb %xmm1, %xmm0 {%k1}
544 ; X64-LABEL: test_mm_mask_broadcastb_epi8:
546 ; X64-NEXT: kmovd %edi, %k1
547 ; X64-NEXT: vpbroadcastb %xmm1, %xmm0 {%k1}
549 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
550 %arg1 = bitcast i16 %a1 to <16 x i1>
551 %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
552 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <16 x i32> zeroinitializer
553 %res1 = select <16 x i1> %arg1, <16 x i8> %res0, <16 x i8> %arg0
554 %res2 = bitcast <16 x i8> %res1 to <2 x i64>
558 define <2 x i64> @test_mm_maskz_broadcastb_epi8(i16 %a0, <2 x i64> %a1) {
559 ; X86-LABEL: test_mm_maskz_broadcastb_epi8:
561 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
562 ; X86-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z}
565 ; X64-LABEL: test_mm_maskz_broadcastb_epi8:
567 ; X64-NEXT: kmovd %edi, %k1
568 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z}
570 %arg0 = bitcast i16 %a0 to <16 x i1>
571 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
572 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <16 x i32> zeroinitializer
573 %res1 = select <16 x i1> %arg0, <16 x i8> %res0, <16 x i8> zeroinitializer
574 %res2 = bitcast <16 x i8> %res1 to <2 x i64>
578 define <4 x i64> @test_mm256_broadcastb_epi8(<2 x i64> %a0) {
579 ; CHECK-LABEL: test_mm256_broadcastb_epi8:
581 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
582 ; CHECK-NEXT: ret{{[l|q]}}
583 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
584 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <32 x i32> zeroinitializer
585 %res1 = bitcast <32 x i8> %res0 to <4 x i64>
589 define <4 x i64> @test_mm256_mask_broadcastb_epi8(<4 x i64> %a0, i32 %a1, <2 x i64> %a2) {
590 ; X86-LABEL: test_mm256_mask_broadcastb_epi8:
592 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
593 ; X86-NEXT: vpbroadcastb %xmm1, %ymm0 {%k1}
596 ; X64-LABEL: test_mm256_mask_broadcastb_epi8:
598 ; X64-NEXT: kmovd %edi, %k1
599 ; X64-NEXT: vpbroadcastb %xmm1, %ymm0 {%k1}
601 %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
602 %arg1 = bitcast i32 %a1 to <32 x i1>
603 %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
604 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <32 x i32> zeroinitializer
605 %res1 = select <32 x i1> %arg1, <32 x i8> %res0, <32 x i8> %arg0
606 %res2 = bitcast <32 x i8> %res1 to <4 x i64>
610 define <4 x i64> @test_mm256_maskz_broadcastb_epi8(i32 %a0, <2 x i64> %a1) {
611 ; X86-LABEL: test_mm256_maskz_broadcastb_epi8:
613 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
614 ; X86-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z}
617 ; X64-LABEL: test_mm256_maskz_broadcastb_epi8:
619 ; X64-NEXT: kmovd %edi, %k1
620 ; X64-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z}
622 %arg0 = bitcast i32 %a0 to <32 x i1>
623 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
624 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <32 x i32> zeroinitializer
625 %res1 = select <32 x i1> %arg0, <32 x i8> %res0, <32 x i8> zeroinitializer
626 %res2 = bitcast <32 x i8> %res1 to <4 x i64>
630 define <2 x i64> @test_mm_broadcastw_epi16(<2 x i64> %a0) {
631 ; CHECK-LABEL: test_mm_broadcastw_epi16:
633 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
634 ; CHECK-NEXT: ret{{[l|q]}}
635 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
636 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> zeroinitializer
637 %res1 = bitcast <8 x i16> %res0 to <2 x i64>
641 define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
642 ; X86-LABEL: test_mm_mask_broadcastw_epi16:
644 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
645 ; X86-NEXT: kmovd %eax, %k1
646 ; X86-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1}
649 ; X64-LABEL: test_mm_mask_broadcastw_epi16:
651 ; X64-NEXT: kmovd %edi, %k1
652 ; X64-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1}
654 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
655 %arg1 = bitcast i8 %a1 to <8 x i1>
656 %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
657 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <8 x i32> zeroinitializer
658 %res1 = select <8 x i1> %arg1, <8 x i16> %res0, <8 x i16> %arg0
659 %res2 = bitcast <8 x i16> %res1 to <2 x i64>
663 define <2 x i64> @test_mm_maskz_broadcastw_epi16(i8 %a0, <2 x i64> %a1) {
664 ; X86-LABEL: test_mm_maskz_broadcastw_epi16:
666 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
667 ; X86-NEXT: kmovd %eax, %k1
668 ; X86-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z}
671 ; X64-LABEL: test_mm_maskz_broadcastw_epi16:
673 ; X64-NEXT: kmovd %edi, %k1
674 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z}
676 %arg0 = bitcast i8 %a0 to <8 x i1>
677 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
678 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <8 x i32> zeroinitializer
679 %res1 = select <8 x i1> %arg0, <8 x i16> %res0, <8 x i16> zeroinitializer
680 %res2 = bitcast <8 x i16> %res1 to <2 x i64>
684 define <4 x i64> @test_mm256_broadcastw_epi16(<2 x i64> %a0) {
685 ; CHECK-LABEL: test_mm256_broadcastw_epi16:
687 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
688 ; CHECK-NEXT: ret{{[l|q]}}
689 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
690 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <16 x i32> zeroinitializer
691 %res1 = bitcast <16 x i16> %res0 to <4 x i64>
695 define <4 x i64> @test_mm256_mask_broadcastw_epi16(<4 x i64> %a0, i16 %a1, <2 x i64> %a2) {
696 ; X86-LABEL: test_mm256_mask_broadcastw_epi16:
698 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
699 ; X86-NEXT: vpbroadcastw %xmm1, %ymm0 {%k1}
702 ; X64-LABEL: test_mm256_mask_broadcastw_epi16:
704 ; X64-NEXT: kmovd %edi, %k1
705 ; X64-NEXT: vpbroadcastw %xmm1, %ymm0 {%k1}
707 %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
708 %arg1 = bitcast i16 %a1 to <16 x i1>
709 %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
710 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <16 x i32> zeroinitializer
711 %res1 = select <16 x i1> %arg1, <16 x i16> %res0, <16 x i16> %arg0
712 %res2 = bitcast <16 x i16> %res1 to <4 x i64>
716 define <4 x i64> @test_mm256_maskz_broadcastw_epi16(i16 %a0, <2 x i64> %a1) {
717 ; X86-LABEL: test_mm256_maskz_broadcastw_epi16:
719 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
720 ; X86-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z}
723 ; X64-LABEL: test_mm256_maskz_broadcastw_epi16:
725 ; X64-NEXT: kmovd %edi, %k1
726 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z}
728 %arg0 = bitcast i16 %a0 to <16 x i1>
729 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
730 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <16 x i32> zeroinitializer
731 %res1 = select <16 x i1> %arg0, <16 x i16> %res0, <16 x i16> zeroinitializer
732 %res2 = bitcast <16 x i16> %res1 to <4 x i64>
736 define <2 x i64> @test_mm_cvtepi16_epi8(<2 x i64> %__A) {
737 ; CHECK-LABEL: test_mm_cvtepi16_epi8:
738 ; CHECK: # %bb.0: # %entry
739 ; CHECK-NEXT: vpmovwb %xmm0, %xmm0
740 ; CHECK-NEXT: ret{{[l|q]}}
742 %0 = bitcast <2 x i64> %__A to <8 x i16>
743 %conv.i = trunc <8 x i16> %0 to <8 x i8>
744 %shuf.i = shufflevector <8 x i8> %conv.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
745 %1 = bitcast <16 x i8> %shuf.i to <2 x i64>
749 define <2 x i64> @test_mm256_cvtepi16_epi8(<4 x i64> %__A) {
750 ; CHECK-LABEL: test_mm256_cvtepi16_epi8:
751 ; CHECK: # %bb.0: # %entry
752 ; CHECK-NEXT: vpmovwb %ymm0, %xmm0
753 ; CHECK-NEXT: vzeroupper
754 ; CHECK-NEXT: ret{{[l|q]}}
756 %0 = bitcast <4 x i64> %__A to <16 x i16>
757 %conv.i = trunc <16 x i16> %0 to <16 x i8>
758 %1 = bitcast <16 x i8> %conv.i to <2 x i64>
762 define <2 x i64> @test_mm256_mask_cvtepi16_epi8(<2 x i64> %__O, i16 zeroext %__M, <4 x i64> %__A) {
763 ; X86-LABEL: test_mm256_mask_cvtepi16_epi8:
764 ; X86: # %bb.0: # %entry
765 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
766 ; X86-NEXT: vpmovwb %ymm1, %xmm0 {%k1}
767 ; X86-NEXT: vzeroupper
770 ; X64-LABEL: test_mm256_mask_cvtepi16_epi8:
771 ; X64: # %bb.0: # %entry
772 ; X64-NEXT: kmovd %edi, %k1
773 ; X64-NEXT: vpmovwb %ymm1, %xmm0 {%k1}
774 ; X64-NEXT: vzeroupper
777 %0 = bitcast <4 x i64> %__A to <16 x i16>
778 %conv.i.i = trunc <16 x i16> %0 to <16 x i8>
779 %1 = bitcast <2 x i64> %__O to <16 x i8>
780 %2 = bitcast i16 %__M to <16 x i1>
781 %3 = select <16 x i1> %2, <16 x i8> %conv.i.i, <16 x i8> %1
782 %4 = bitcast <16 x i8> %3 to <2 x i64>
786 define <2 x i64> @test_mm256_maskz_cvtepi16_epi8(i16 zeroext %__M, <4 x i64> %__A) {
787 ; X86-LABEL: test_mm256_maskz_cvtepi16_epi8:
788 ; X86: # %bb.0: # %entry
789 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
790 ; X86-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z}
791 ; X86-NEXT: vzeroupper
794 ; X64-LABEL: test_mm256_maskz_cvtepi16_epi8:
795 ; X64: # %bb.0: # %entry
796 ; X64-NEXT: kmovd %edi, %k1
797 ; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z}
798 ; X64-NEXT: vzeroupper
801 %0 = bitcast <4 x i64> %__A to <16 x i16>
802 %conv.i.i = trunc <16 x i16> %0 to <16 x i8>
803 %1 = bitcast i16 %__M to <16 x i1>
804 %2 = select <16 x i1> %1, <16 x i8> %conv.i.i, <16 x i8> zeroinitializer
805 %3 = bitcast <16 x i8> %2 to <2 x i64>
809 define <2 x i64> @test_mm_mask2_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) {
810 ; X86-LABEL: test_mm_mask2_permutex2var_epi16:
811 ; X86: # %bb.0: # %entry
812 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
813 ; X86-NEXT: kmovd %eax, %k1
814 ; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1}
815 ; X86-NEXT: vmovdqa %xmm1, %xmm0
818 ; X64-LABEL: test_mm_mask2_permutex2var_epi16:
819 ; X64: # %bb.0: # %entry
820 ; X64-NEXT: kmovd %edi, %k1
821 ; X64-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1}
822 ; X64-NEXT: vmovdqa %xmm1, %xmm0
825 %0 = bitcast <2 x i64> %__A to <8 x i16>
826 %1 = bitcast <2 x i64> %__I to <8 x i16>
827 %2 = bitcast <2 x i64> %__B to <8 x i16>
828 %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
829 %4 = bitcast i8 %__U to <8 x i1>
830 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %1
831 %6 = bitcast <8 x i16> %5 to <2 x i64>
835 define <4 x i64> @test_mm256_mask2_permutex2var_epi16(<4 x i64> %__A, <4 x i64> %__I, i16 zeroext %__U, <4 x i64> %__B) {
836 ; X86-LABEL: test_mm256_mask2_permutex2var_epi16:
837 ; X86: # %bb.0: # %entry
838 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
839 ; X86-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1}
840 ; X86-NEXT: vmovdqa %ymm1, %ymm0
843 ; X64-LABEL: test_mm256_mask2_permutex2var_epi16:
844 ; X64: # %bb.0: # %entry
845 ; X64-NEXT: kmovd %edi, %k1
846 ; X64-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1}
847 ; X64-NEXT: vmovdqa %ymm1, %ymm0
850 %0 = bitcast <4 x i64> %__A to <16 x i16>
851 %1 = bitcast <4 x i64> %__I to <16 x i16>
852 %2 = bitcast <4 x i64> %__B to <16 x i16>
853 %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
854 %4 = bitcast i16 %__U to <16 x i1>
855 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %1
856 %6 = bitcast <16 x i16> %5 to <4 x i64>
860 define <2 x i64> @test_mm_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
861 ; CHECK-LABEL: test_mm_permutex2var_epi16:
862 ; CHECK: # %bb.0: # %entry
863 ; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm0
864 ; CHECK-NEXT: ret{{[l|q]}}
866 %0 = bitcast <2 x i64> %__A to <8 x i16>
867 %1 = bitcast <2 x i64> %__I to <8 x i16>
868 %2 = bitcast <2 x i64> %__B to <8 x i16>
869 %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
870 %4 = bitcast <8 x i16> %3 to <2 x i64>
874 define <2 x i64> @test_mm_mask_permutex2var_epi16(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
875 ; X86-LABEL: test_mm_mask_permutex2var_epi16:
876 ; X86: # %bb.0: # %entry
877 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
878 ; X86-NEXT: kmovd %eax, %k1
879 ; X86-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1}
882 ; X64-LABEL: test_mm_mask_permutex2var_epi16:
883 ; X64: # %bb.0: # %entry
884 ; X64-NEXT: kmovd %edi, %k1
885 ; X64-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1}
888 %0 = bitcast <2 x i64> %__A to <8 x i16>
889 %1 = bitcast <2 x i64> %__I to <8 x i16>
890 %2 = bitcast <2 x i64> %__B to <8 x i16>
891 %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
892 %4 = bitcast i8 %__U to <8 x i1>
893 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0
894 %6 = bitcast <8 x i16> %5 to <2 x i64>
898 define <2 x i64> @test_mm_maskz_permutex2var_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
899 ; X86-LABEL: test_mm_maskz_permutex2var_epi16:
900 ; X86: # %bb.0: # %entry
901 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
902 ; X86-NEXT: kmovd %eax, %k1
903 ; X86-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z}
906 ; X64-LABEL: test_mm_maskz_permutex2var_epi16:
907 ; X64: # %bb.0: # %entry
908 ; X64-NEXT: kmovd %edi, %k1
909 ; X64-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z}
912 %0 = bitcast <2 x i64> %__A to <8 x i16>
913 %1 = bitcast <2 x i64> %__I to <8 x i16>
914 %2 = bitcast <2 x i64> %__B to <8 x i16>
915 %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
916 %4 = bitcast i8 %__U to <8 x i1>
917 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer
918 %6 = bitcast <8 x i16> %5 to <2 x i64>
922 define <4 x i64> @test_mm256_permutex2var_epi16(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
923 ; CHECK-LABEL: test_mm256_permutex2var_epi16:
924 ; CHECK: # %bb.0: # %entry
925 ; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm0
926 ; CHECK-NEXT: ret{{[l|q]}}
928 %0 = bitcast <4 x i64> %__A to <16 x i16>
929 %1 = bitcast <4 x i64> %__I to <16 x i16>
930 %2 = bitcast <4 x i64> %__B to <16 x i16>
931 %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
932 %4 = bitcast <16 x i16> %3 to <4 x i64>
936 define <4 x i64> @test_mm256_mask_permutex2var_epi16(<4 x i64> %__A, i16 zeroext %__U, <4 x i64> %__I, <4 x i64> %__B) {
937 ; X86-LABEL: test_mm256_mask_permutex2var_epi16:
938 ; X86: # %bb.0: # %entry
939 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
940 ; X86-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 {%k1}
943 ; X64-LABEL: test_mm256_mask_permutex2var_epi16:
944 ; X64: # %bb.0: # %entry
945 ; X64-NEXT: kmovd %edi, %k1
946 ; X64-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 {%k1}
949 %0 = bitcast <4 x i64> %__A to <16 x i16>
950 %1 = bitcast <4 x i64> %__I to <16 x i16>
951 %2 = bitcast <4 x i64> %__B to <16 x i16>
952 %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
953 %4 = bitcast i16 %__U to <16 x i1>
954 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0
955 %6 = bitcast <16 x i16> %5 to <4 x i64>
959 define <4 x i64> @test_mm256_maskz_permutex2var_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
960 ; X86-LABEL: test_mm256_maskz_permutex2var_epi16:
961 ; X86: # %bb.0: # %entry
962 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
963 ; X86-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 {%k1} {z}
966 ; X64-LABEL: test_mm256_maskz_permutex2var_epi16:
967 ; X64: # %bb.0: # %entry
968 ; X64-NEXT: kmovd %edi, %k1
969 ; X64-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 {%k1} {z}
972 %0 = bitcast <4 x i64> %__A to <16 x i16>
973 %1 = bitcast <4 x i64> %__I to <16 x i16>
974 %2 = bitcast <4 x i64> %__B to <16 x i16>
975 %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
976 %4 = bitcast i16 %__U to <16 x i1>
977 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer
978 %6 = bitcast <16 x i16> %5 to <4 x i64>
982 declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>)
983 declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>)