1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c
7 define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
8 ; X86-LABEL: test_mm512_kunpackd:
9 ; X86: # %bb.0: # %entry
10 ; X86-NEXT: pushl %ebp
11 ; X86-NEXT: .cfi_def_cfa_offset 8
12 ; X86-NEXT: .cfi_offset %ebp, -8
13 ; X86-NEXT: movl %esp, %ebp
14 ; X86-NEXT: .cfi_def_cfa_register %ebp
15 ; X86-NEXT: andl $-64, %esp
16 ; X86-NEXT: subl $64, %esp
17 ; X86-NEXT: vmovdqa64 136(%ebp), %zmm3
18 ; X86-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
19 ; X86-NEXT: vpcmpneqb 8(%ebp), %zmm2, %k1
20 ; X86-NEXT: vpcmpneqb 72(%ebp), %zmm3, %k2
21 ; X86-NEXT: kandd %k0, %k2, %k0
22 ; X86-NEXT: kmovd %k0, %eax
23 ; X86-NEXT: kshiftrq $32, %k2, %k0
24 ; X86-NEXT: kandd %k1, %k0, %k0
25 ; X86-NEXT: kmovd %k0, %edx
26 ; X86-NEXT: movl %ebp, %esp
28 ; X86-NEXT: .cfi_def_cfa %esp, 4
29 ; X86-NEXT: vzeroupper
32 ; X64-LABEL: test_mm512_kunpackd:
33 ; X64: # %bb.0: # %entry
34 ; X64-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
35 ; X64-NEXT: vpcmpneqb %zmm3, %zmm2, %k1
36 ; X64-NEXT: kunpckdq %k0, %k1, %k1
37 ; X64-NEXT: vpcmpneqb %zmm5, %zmm4, %k0 {%k1}
38 ; X64-NEXT: kmovq %k0, %rax
39 ; X64-NEXT: vzeroupper
42 %0 = bitcast <8 x i64> %__E to <64 x i8>
43 %1 = bitcast <8 x i64> %__F to <64 x i8>
44 %2 = bitcast <8 x i64> %__B to <64 x i8>
45 %3 = bitcast <8 x i64> %__A to <64 x i8>
46 %4 = icmp ne <64 x i8> %2, %3
47 %5 = bitcast <8 x i64> %__C to <64 x i8>
48 %6 = bitcast <8 x i64> %__D to <64 x i8>
49 %7 = icmp ne <64 x i8> %5, %6
50 %8 = shufflevector <64 x i1> %4, <64 x i1> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
51 %9 = shufflevector <64 x i1> %7, <64 x i1> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
52 %10 = shufflevector <32 x i1> %8, <32 x i1> %9, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
53 %11 = icmp ne <64 x i8> %0, %1
54 %12 = and <64 x i1> %11, %10
55 %13 = bitcast <64 x i1> %12 to i64
59 define i32 @test_mm512_kunpackw(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
60 ; X86-LABEL: test_mm512_kunpackw:
61 ; X86: # %bb.0: # %entry
62 ; X86-NEXT: pushl %ebp
63 ; X86-NEXT: .cfi_def_cfa_offset 8
64 ; X86-NEXT: .cfi_offset %ebp, -8
65 ; X86-NEXT: movl %esp, %ebp
66 ; X86-NEXT: .cfi_def_cfa_register %ebp
67 ; X86-NEXT: andl $-64, %esp
68 ; X86-NEXT: subl $64, %esp
69 ; X86-NEXT: vmovdqa64 136(%ebp), %zmm3
70 ; X86-NEXT: vpcmpneqw %zmm0, %zmm1, %k0
71 ; X86-NEXT: vpcmpneqw 8(%ebp), %zmm2, %k1
72 ; X86-NEXT: kunpckwd %k0, %k1, %k1
73 ; X86-NEXT: vpcmpneqw 72(%ebp), %zmm3, %k0 {%k1}
74 ; X86-NEXT: kmovd %k0, %eax
75 ; X86-NEXT: movl %ebp, %esp
77 ; X86-NEXT: .cfi_def_cfa %esp, 4
78 ; X86-NEXT: vzeroupper
81 ; X64-LABEL: test_mm512_kunpackw:
82 ; X64: # %bb.0: # %entry
83 ; X64-NEXT: vpcmpneqw %zmm0, %zmm1, %k0
84 ; X64-NEXT: vpcmpneqw %zmm3, %zmm2, %k1
85 ; X64-NEXT: kunpckwd %k0, %k1, %k1
86 ; X64-NEXT: vpcmpneqw %zmm5, %zmm4, %k0 {%k1}
87 ; X64-NEXT: kmovd %k0, %eax
88 ; X64-NEXT: vzeroupper
91 %0 = bitcast <8 x i64> %__E to <32 x i16>
92 %1 = bitcast <8 x i64> %__F to <32 x i16>
93 %2 = bitcast <8 x i64> %__B to <32 x i16>
94 %3 = bitcast <8 x i64> %__A to <32 x i16>
95 %4 = icmp ne <32 x i16> %2, %3
96 %5 = bitcast <8 x i64> %__C to <32 x i16>
97 %6 = bitcast <8 x i64> %__D to <32 x i16>
98 %7 = icmp ne <32 x i16> %5, %6
99 %8 = shufflevector <32 x i1> %4, <32 x i1> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
100 %9 = shufflevector <32 x i1> %7, <32 x i1> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
101 %10 = shufflevector <16 x i1> %8, <16 x i1> %9, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
102 %11 = icmp ne <32 x i16> %0, %1
103 %12 = and <32 x i1> %11, %10
104 %13 = bitcast <32 x i1> %12 to i32
109 define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext %__A) {
110 ; X86-LABEL: test_mm512_mask_set1_epi8:
111 ; X86: # %bb.0: # %entry
112 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
113 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
114 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
115 ; X86-NEXT: kunpckdq %k1, %k0, %k1
116 ; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1}
119 ; X64-LABEL: test_mm512_mask_set1_epi8:
120 ; X64: # %bb.0: # %entry
121 ; X64-NEXT: kmovq %rdi, %k1
122 ; X64-NEXT: vpbroadcastb %esi, %zmm0 {%k1}
125 %vecinit.i.i = insertelement <64 x i8> undef, i8 %__A, i32 0
126 %vecinit63.i.i = shufflevector <64 x i8> %vecinit.i.i, <64 x i8> undef, <64 x i32> zeroinitializer
127 %0 = bitcast <8 x i64> %__O to <64 x i8>
128 %1 = bitcast i64 %__M to <64 x i1>
129 %2 = select <64 x i1> %1, <64 x i8> %vecinit63.i.i, <64 x i8> %0
130 %3 = bitcast <64 x i8> %2 to <8 x i64>
134 define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
135 ; X86-LABEL: test_mm512_maskz_set1_epi8:
136 ; X86: # %bb.0: # %entry
137 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
138 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
139 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
140 ; X86-NEXT: kunpckdq %k1, %k0, %k1
141 ; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1} {z}
144 ; X64-LABEL: test_mm512_maskz_set1_epi8:
145 ; X64: # %bb.0: # %entry
146 ; X64-NEXT: kmovq %rdi, %k1
147 ; X64-NEXT: vpbroadcastb %esi, %zmm0 {%k1} {z}
150 %vecinit.i.i = insertelement <64 x i8> undef, i8 %__A, i32 0
151 %vecinit63.i.i = shufflevector <64 x i8> %vecinit.i.i, <64 x i8> undef, <64 x i32> zeroinitializer
152 %0 = bitcast i64 %__M to <64 x i1>
153 %1 = select <64 x i1> %0, <64 x i8> %vecinit63.i.i, <64 x i8> zeroinitializer
154 %2 = bitcast <64 x i8> %1 to <8 x i64>
158 define <8 x i64> @test_mm512_mask_set1_epi16(<8 x i64> %__O, i32 %__M, i16 signext %__A) {
159 ; X86-LABEL: test_mm512_mask_set1_epi16:
160 ; X86: # %bb.0: # %entry
161 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
162 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
163 ; X86-NEXT: vpbroadcastw %eax, %zmm0 {%k1}
166 ; X64-LABEL: test_mm512_mask_set1_epi16:
167 ; X64: # %bb.0: # %entry
168 ; X64-NEXT: kmovd %edi, %k1
169 ; X64-NEXT: vpbroadcastw %esi, %zmm0 {%k1}
172 %vecinit.i.i = insertelement <32 x i16> undef, i16 %__A, i32 0
173 %vecinit31.i.i = shufflevector <32 x i16> %vecinit.i.i, <32 x i16> undef, <32 x i32> zeroinitializer
174 %0 = bitcast <8 x i64> %__O to <32 x i16>
175 %1 = bitcast i32 %__M to <32 x i1>
176 %2 = select <32 x i1> %1, <32 x i16> %vecinit31.i.i, <32 x i16> %0
177 %3 = bitcast <32 x i16> %2 to <8 x i64>
181 define <8 x i64> @test_mm512_maskz_set1_epi16(i32 %__M, i16 signext %__A) {
182 ; X86-LABEL: test_mm512_maskz_set1_epi16:
183 ; X86: # %bb.0: # %entry
184 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
185 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
186 ; X86-NEXT: vpbroadcastw %eax, %zmm0 {%k1} {z}
189 ; X64-LABEL: test_mm512_maskz_set1_epi16:
190 ; X64: # %bb.0: # %entry
191 ; X64-NEXT: kmovd %edi, %k1
192 ; X64-NEXT: vpbroadcastw %esi, %zmm0 {%k1} {z}
195 %vecinit.i.i = insertelement <32 x i16> undef, i16 %__A, i32 0
196 %vecinit31.i.i = shufflevector <32 x i16> %vecinit.i.i, <32 x i16> undef, <32 x i32> zeroinitializer
197 %0 = bitcast i32 %__M to <32 x i1>
198 %1 = select <32 x i1> %0, <32 x i16> %vecinit31.i.i, <32 x i16> zeroinitializer
199 %2 = bitcast <32 x i16> %1 to <8 x i64>
203 define <8 x i64> @test_mm512_broadcastb_epi8(<2 x i64> %a0) {
204 ; CHECK-LABEL: test_mm512_broadcastb_epi8:
206 ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0
207 ; CHECK-NEXT: ret{{[l|q]}}
208 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
209 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <64 x i32> zeroinitializer
210 %res1 = bitcast <64 x i8> %res0 to <8 x i64>
214 define <8 x i64> @test_mm512_mask_broadcastb_epi8(<8 x i64> %a0, i64* %a1, <2 x i64> %a2) {
215 ; X86-LABEL: test_mm512_mask_broadcastb_epi8:
217 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
218 ; X86-NEXT: kmovq (%eax), %k1
219 ; X86-NEXT: vpbroadcastb %xmm1, %zmm0 {%k1}
222 ; X64-LABEL: test_mm512_mask_broadcastb_epi8:
224 ; X64-NEXT: kmovq (%rdi), %k1
225 ; X64-NEXT: vpbroadcastb %xmm1, %zmm0 {%k1}
227 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
228 %bc1 = bitcast i64* %a1 to <64 x i1>*
229 %arg1 = load <64 x i1>, <64 x i1>* %bc1
230 %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
231 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <64 x i32> zeroinitializer
232 %res1 = select <64 x i1> %arg1, <64 x i8> %res0, <64 x i8> %arg0
233 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
237 define <8 x i64> @test_mm512_maskz_broadcastb_epi8(i64* %a0, <2 x i64> %a1) {
238 ; X86-LABEL: test_mm512_maskz_broadcastb_epi8:
240 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
241 ; X86-NEXT: kmovq (%eax), %k1
242 ; X86-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z}
245 ; X64-LABEL: test_mm512_maskz_broadcastb_epi8:
247 ; X64-NEXT: kmovq (%rdi), %k1
248 ; X64-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z}
250 %bc0 = bitcast i64* %a0 to <64 x i1>*
251 %arg0 = load <64 x i1>, <64 x i1>* %bc0
252 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
253 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <64 x i32> zeroinitializer
254 %res1 = select <64 x i1> %arg0, <64 x i8> %res0, <64 x i8> zeroinitializer
255 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
259 define <8 x i64> @test_mm512_broadcastw_epi16(<2 x i64> %a0) {
260 ; CHECK-LABEL: test_mm512_broadcastw_epi16:
262 ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0
263 ; CHECK-NEXT: ret{{[l|q]}}
264 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
265 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <32 x i32> zeroinitializer
266 %res1 = bitcast <32 x i16> %res0 to <8 x i64>
270 define <8 x i64> @test_mm512_mask_broadcastw_epi16(<8 x i64> %a0, i32 %a1, <2 x i64> %a2) {
271 ; X86-LABEL: test_mm512_mask_broadcastw_epi16:
273 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
274 ; X86-NEXT: vpbroadcastw %xmm1, %zmm0 {%k1}
277 ; X64-LABEL: test_mm512_mask_broadcastw_epi16:
279 ; X64-NEXT: kmovd %edi, %k1
280 ; X64-NEXT: vpbroadcastw %xmm1, %zmm0 {%k1}
282 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
283 %arg1 = bitcast i32 %a1 to <32 x i1>
284 %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
285 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <32 x i32> zeroinitializer
286 %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0
287 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
291 define <8 x i64> @test_mm512_maskz_broadcastw_epi16(i32 %a0, <2 x i64> %a1) {
292 ; X86-LABEL: test_mm512_maskz_broadcastw_epi16:
294 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
295 ; X86-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z}
298 ; X64-LABEL: test_mm512_maskz_broadcastw_epi16:
300 ; X64-NEXT: kmovd %edi, %k1
301 ; X64-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z}
303 %arg0 = bitcast i32 %a0 to <32 x i1>
304 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
305 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <32 x i32> zeroinitializer
306 %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer
307 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
311 define <8 x i64> @test_mm512_bslli_epi128(<8 x i64> %a0) {
312 ; CHECK-LABEL: test_mm512_bslli_epi128:
314 ; CHECK-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
315 ; CHECK-NEXT: ret{{[l|q]}}
316 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
317 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> zeroinitializer, <64 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122>
318 %res1 = bitcast <64 x i8> %res0 to <8 x i64>
322 define <8 x i64> @test_mm512_bsrli_epi128(<8 x i64> %a0) {
323 ; CHECK-LABEL: test_mm512_bsrli_epi128:
325 ; CHECK-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zmm0[21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zmm0[37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zmm0[53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero
326 ; CHECK-NEXT: ret{{[l|q]}}
327 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
328 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> zeroinitializer, <64 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 68, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 84, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 100, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115, i32 116>
329 %res1 = bitcast <64 x i8> %res0 to <8 x i64>
333 define <8 x i64> @test_mm512_unpackhi_epi8(<8 x i64> %a0, <8 x i64> %a1) {
334 ; CHECK-LABEL: test_mm512_unpackhi_epi8:
336 ; CHECK-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
337 ; CHECK-NEXT: ret{{[l|q]}}
338 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
339 %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
340 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> %arg1, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
341 %res1 = bitcast <64 x i8> %res0 to <8 x i64>
345 ; TODO - improve support for i64 -> mmask64 on 32-bit targets
346 define <8 x i64> @test_mm512_mask_unpackhi_epi8(<8 x i64> %a0, i64* %a1, <8 x i64> %a2, <8 x i64> %a3) {
347 ; X86-LABEL: test_mm512_mask_unpackhi_epi8:
349 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
350 ; X86-NEXT: kmovq (%eax), %k1
351 ; X86-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
354 ; X64-LABEL: test_mm512_mask_unpackhi_epi8:
356 ; X64-NEXT: kmovq (%rdi), %k1
357 ; X64-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
359 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
360 %arg1 = bitcast i64* %a1 to <64 x i1>*
361 %sel1 = load <64 x i1>, <64 x i1>* %arg1
362 %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
363 %arg3 = bitcast <8 x i64> %a3 to <64 x i8>
364 %res0 = shufflevector <64 x i8> %arg2, <64 x i8> %arg3, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
365 %res1 = select <64 x i1> %sel1, <64 x i8> %res0, <64 x i8> %arg0
366 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
370 define <8 x i64> @test_mm512_maskz_unpackhi_epi8(i64* %a0, <8 x i64> %a1, <8 x i64> %a2) {
371 ; X86-LABEL: test_mm512_maskz_unpackhi_epi8:
373 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
374 ; X86-NEXT: kmovq (%eax), %k1
375 ; X86-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} {z} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
378 ; X64-LABEL: test_mm512_maskz_unpackhi_epi8:
380 ; X64-NEXT: kmovq (%rdi), %k1
381 ; X64-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} {z} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
383 %arg0 = bitcast i64* %a0 to <64 x i1>*
384 %sel0 = load <64 x i1>, <64 x i1>* %arg0
385 %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
386 %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
387 %res0 = shufflevector <64 x i8> %arg1, <64 x i8> %arg2, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
388 %res1 = select <64 x i1> %sel0, <64 x i8> %res0, <64 x i8> zeroinitializer
389 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
393 define <8 x i64> @test_mm512_unpackhi_epi16(<8 x i64> %a0, <8 x i64> %a1) {
394 ; CHECK-LABEL: test_mm512_unpackhi_epi16:
396 ; CHECK-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
397 ; CHECK-NEXT: ret{{[l|q]}}
398 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
399 %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
400 %res0 = shufflevector <32 x i16> %arg0, <32 x i16> %arg1, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
401 %res1 = bitcast <32 x i16> %res0 to <8 x i64>
405 define <8 x i64> @test_mm512_mask_unpackhi_epi16(<8 x i64> %a0, i32 %a1, <8 x i64> %a2, <8 x i64> %a3) {
406 ; X86-LABEL: test_mm512_mask_unpackhi_epi16:
408 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
409 ; X86-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31]
412 ; X64-LABEL: test_mm512_mask_unpackhi_epi16:
414 ; X64-NEXT: kmovd %edi, %k1
415 ; X64-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31]
417 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
418 %arg1 = bitcast i32 %a1 to <32 x i1>
419 %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
420 %arg3 = bitcast <8 x i64> %a3 to <32 x i16>
421 %res0 = shufflevector <32 x i16> %arg2, <32 x i16> %arg3, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
422 %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0
423 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
427 define <8 x i64> @test_mm512_maskz_unpackhi_epi16(i32 %a0, <8 x i64> %a1, <8 x i64> %a2) {
428 ; X86-LABEL: test_mm512_maskz_unpackhi_epi16:
430 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
431 ; X86-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} {z} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
434 ; X64-LABEL: test_mm512_maskz_unpackhi_epi16:
436 ; X64-NEXT: kmovd %edi, %k1
437 ; X64-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} {z} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
439 %arg0 = bitcast i32 %a0 to <32 x i1>
440 %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
441 %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
442 %res0 = shufflevector <32 x i16> %arg1, <32 x i16> %arg2, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
443 %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer
444 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
448 define <8 x i64> @test_mm512_unpacklo_epi8(<8 x i64> %a0, <8 x i64> %a1) {
449 ; CHECK-LABEL: test_mm512_unpacklo_epi8:
451 ; CHECK-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
452 ; CHECK-NEXT: ret{{[l|q]}}
453 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
454 %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
455 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> %arg1, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
456 %res1 = bitcast <64 x i8> %res0 to <8 x i64>
460 define <8 x i64> @test_mm512_mask_unpacklo_epi8(<8 x i64> %a0, i64* %a1, <8 x i64> %a2, <8 x i64> %a3) {
461 ; X86-LABEL: test_mm512_mask_unpacklo_epi8:
463 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
464 ; X86-NEXT: kmovq (%eax), %k1
465 ; X86-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
468 ; X64-LABEL: test_mm512_mask_unpacklo_epi8:
470 ; X64-NEXT: kmovq (%rdi), %k1
471 ; X64-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
473 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
474 %arg1 = bitcast i64* %a1 to <64 x i1>*
475 %sel1 = load <64 x i1>, <64 x i1>* %arg1
476 %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
477 %arg3 = bitcast <8 x i64> %a3 to <64 x i8>
478 %res0 = shufflevector <64 x i8> %arg2, <64 x i8> %arg3, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
479 %res1 = select <64 x i1> %sel1, <64 x i8> %res0, <64 x i8> %arg0
480 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
484 define <8 x i64> @test_mm512_maskz_unpacklo_epi8(i64* %a0, <8 x i64> %a1, <8 x i64> %a2) {
485 ; X86-LABEL: test_mm512_maskz_unpacklo_epi8:
487 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
488 ; X86-NEXT: kmovq (%eax), %k1
489 ; X86-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
492 ; X64-LABEL: test_mm512_maskz_unpacklo_epi8:
494 ; X64-NEXT: kmovq (%rdi), %k1
495 ; X64-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
497 %arg0 = bitcast i64* %a0 to <64 x i1>*
498 %sel0 = load <64 x i1>, <64 x i1>* %arg0
499 %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
500 %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
501 %res0 = shufflevector <64 x i8> %arg1, <64 x i8> %arg2, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
502 %res1 = select <64 x i1> %sel0, <64 x i8> %res0, <64 x i8> zeroinitializer
503 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
507 define <8 x i64> @test_mm512_unpacklo_epi16(<8 x i64> %a0, <8 x i64> %a1) {
508 ; CHECK-LABEL: test_mm512_unpacklo_epi16:
510 ; CHECK-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
511 ; CHECK-NEXT: ret{{[l|q]}}
512 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
513 %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
514 %res0 = shufflevector <32 x i16> %arg0, <32 x i16> %arg1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
515 %res1 = bitcast <32 x i16> %res0 to <8 x i64>
519 define <8 x i64> @test_mm512_mask_unpacklo_epi16(<8 x i64> %a0, i32 %a1, <8 x i64> %a2, <8 x i64> %a3) {
520 ; X86-LABEL: test_mm512_mask_unpacklo_epi16:
522 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
523 ; X86-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27]
526 ; X64-LABEL: test_mm512_mask_unpacklo_epi16:
528 ; X64-NEXT: kmovd %edi, %k1
529 ; X64-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27]
531 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
532 %arg1 = bitcast i32 %a1 to <32 x i1>
533 %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
534 %arg3 = bitcast <8 x i64> %a3 to <32 x i16>
535 %res0 = shufflevector <32 x i16> %arg2, <32 x i16> %arg3, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
536 %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0
537 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
541 define <8 x i64> @test_mm512_maskz_unpacklo_epi16(i32 %a0, <8 x i64> %a1, <8 x i64> %a2) {
542 ; X86-LABEL: test_mm512_maskz_unpacklo_epi16:
544 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
545 ; X86-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
548 ; X64-LABEL: test_mm512_maskz_unpacklo_epi16:
550 ; X64-NEXT: kmovd %edi, %k1
551 ; X64-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
553 %arg0 = bitcast i32 %a0 to <32 x i1>
554 %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
555 %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
556 %res0 = shufflevector <32 x i16> %arg1, <32 x i16> %arg2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
557 %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer
558 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
562 define i64 @test_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) {
563 ; X86-LABEL: test_mm512_test_epi8_mask:
564 ; X86: # %bb.0: # %entry
565 ; X86-NEXT: vptestmb %zmm0, %zmm1, %k0
566 ; X86-NEXT: kshiftrq $32, %k0, %k1
567 ; X86-NEXT: kmovd %k0, %eax
568 ; X86-NEXT: kmovd %k1, %edx
569 ; X86-NEXT: vzeroupper
572 ; X64-LABEL: test_mm512_test_epi8_mask:
573 ; X64: # %bb.0: # %entry
574 ; X64-NEXT: vptestmb %zmm0, %zmm1, %k0
575 ; X64-NEXT: kmovq %k0, %rax
576 ; X64-NEXT: vzeroupper
579 %and1.i.i = and <8 x i64> %__B, %__A
580 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
581 %1 = icmp ne <64 x i8> %0, zeroinitializer
582 %2 = bitcast <64 x i1> %1 to i64
586 define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
587 ; X86-LABEL: test_mm512_mask_test_epi8_mask:
588 ; X86: # %bb.0: # %entry
589 ; X86-NEXT: vptestmb %zmm0, %zmm1, %k0
590 ; X86-NEXT: kshiftrq $32, %k0, %k1
591 ; X86-NEXT: kmovd %k1, %edx
592 ; X86-NEXT: kmovd %k0, %eax
593 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
594 ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
595 ; X86-NEXT: vzeroupper
598 ; X64-LABEL: test_mm512_mask_test_epi8_mask:
599 ; X64: # %bb.0: # %entry
600 ; X64-NEXT: kmovq %rdi, %k1
601 ; X64-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1}
602 ; X64-NEXT: kmovq %k0, %rax
603 ; X64-NEXT: vzeroupper
606 %and1.i.i = and <8 x i64> %__B, %__A
607 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
608 %1 = icmp ne <64 x i8> %0, zeroinitializer
609 %2 = bitcast i64 %__U to <64 x i1>
610 %3 = and <64 x i1> %1, %2
611 %4 = bitcast <64 x i1> %3 to i64
615 define i32 @test_mm512_test_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) {
616 ; CHECK-LABEL: test_mm512_test_epi16_mask:
617 ; CHECK: # %bb.0: # %entry
618 ; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0
619 ; CHECK-NEXT: kmovd %k0, %eax
620 ; CHECK-NEXT: vzeroupper
621 ; CHECK-NEXT: ret{{[l|q]}}
623 %and1.i.i = and <8 x i64> %__B, %__A
624 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
625 %1 = icmp ne <32 x i16> %0, zeroinitializer
626 %2 = bitcast <32 x i1> %1 to i32
630 define i32 @test_mm512_mask_test_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
631 ; X86-LABEL: test_mm512_mask_test_epi16_mask:
632 ; X86: # %bb.0: # %entry
633 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
634 ; X86-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1}
635 ; X86-NEXT: kmovd %k0, %eax
636 ; X86-NEXT: vzeroupper
639 ; X64-LABEL: test_mm512_mask_test_epi16_mask:
640 ; X64: # %bb.0: # %entry
641 ; X64-NEXT: kmovd %edi, %k1
642 ; X64-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1}
643 ; X64-NEXT: kmovd %k0, %eax
644 ; X64-NEXT: vzeroupper
647 %and1.i.i = and <8 x i64> %__B, %__A
648 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
649 %1 = icmp ne <32 x i16> %0, zeroinitializer
650 %2 = bitcast i32 %__U to <32 x i1>
651 %3 = and <32 x i1> %1, %2
652 %4 = bitcast <32 x i1> %3 to i32
656 define i64 @test_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) {
657 ; X86-LABEL: test_mm512_testn_epi8_mask:
658 ; X86: # %bb.0: # %entry
659 ; X86-NEXT: vptestnmb %zmm0, %zmm1, %k0
660 ; X86-NEXT: kshiftrq $32, %k0, %k1
661 ; X86-NEXT: kmovd %k0, %eax
662 ; X86-NEXT: kmovd %k1, %edx
663 ; X86-NEXT: vzeroupper
666 ; X64-LABEL: test_mm512_testn_epi8_mask:
667 ; X64: # %bb.0: # %entry
668 ; X64-NEXT: vptestnmb %zmm0, %zmm1, %k0
669 ; X64-NEXT: kmovq %k0, %rax
670 ; X64-NEXT: vzeroupper
673 %and1.i.i = and <8 x i64> %__B, %__A
674 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
675 %1 = icmp eq <64 x i8> %0, zeroinitializer
676 %2 = bitcast <64 x i1> %1 to i64
680 define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
681 ; X86-LABEL: test_mm512_mask_testn_epi8_mask:
682 ; X86: # %bb.0: # %entry
683 ; X86-NEXT: vptestnmb %zmm0, %zmm1, %k0
684 ; X86-NEXT: kshiftrq $32, %k0, %k1
685 ; X86-NEXT: kmovd %k1, %edx
686 ; X86-NEXT: kmovd %k0, %eax
687 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
688 ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
689 ; X86-NEXT: vzeroupper
692 ; X64-LABEL: test_mm512_mask_testn_epi8_mask:
693 ; X64: # %bb.0: # %entry
694 ; X64-NEXT: kmovq %rdi, %k1
695 ; X64-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1}
696 ; X64-NEXT: kmovq %k0, %rax
697 ; X64-NEXT: vzeroupper
700 %and1.i.i = and <8 x i64> %__B, %__A
701 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
702 %1 = icmp eq <64 x i8> %0, zeroinitializer
703 %2 = bitcast i64 %__U to <64 x i1>
704 %3 = and <64 x i1> %1, %2
705 %4 = bitcast <64 x i1> %3 to i64
709 define i32 @test_mm512_testn_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) {
710 ; CHECK-LABEL: test_mm512_testn_epi16_mask:
711 ; CHECK: # %bb.0: # %entry
712 ; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0
713 ; CHECK-NEXT: kmovd %k0, %eax
714 ; CHECK-NEXT: vzeroupper
715 ; CHECK-NEXT: ret{{[l|q]}}
717 %and1.i.i = and <8 x i64> %__B, %__A
718 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
719 %1 = icmp eq <32 x i16> %0, zeroinitializer
720 %2 = bitcast <32 x i1> %1 to i32
724 define i32 @test_mm512_mask_testn_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
725 ; X86-LABEL: test_mm512_mask_testn_epi16_mask:
726 ; X86: # %bb.0: # %entry
727 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
728 ; X86-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1}
729 ; X86-NEXT: kmovd %k0, %eax
730 ; X86-NEXT: vzeroupper
733 ; X64-LABEL: test_mm512_mask_testn_epi16_mask:
734 ; X64: # %bb.0: # %entry
735 ; X64-NEXT: kmovd %edi, %k1
736 ; X64-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1}
737 ; X64-NEXT: kmovd %k0, %eax
738 ; X64-NEXT: vzeroupper
741 %and1.i.i = and <8 x i64> %__B, %__A
742 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
743 %1 = icmp eq <32 x i16> %0, zeroinitializer
744 %2 = bitcast i32 %__U to <32 x i1>
745 %3 = and <32 x i1> %1, %2
746 %4 = bitcast <32 x i1> %3 to i32
750 define <4 x i64> @test_mm512_cvtepi16_epi8(<8 x i64> %__A) {
751 ; CHECK-LABEL: test_mm512_cvtepi16_epi8:
752 ; CHECK: # %bb.0: # %entry
753 ; CHECK-NEXT: vpmovwb %zmm0, %ymm0
754 ; CHECK-NEXT: ret{{[l|q]}}
756 %0 = bitcast <8 x i64> %__A to <32 x i16>
757 %conv.i = trunc <32 x i16> %0 to <32 x i8>
758 %1 = bitcast <32 x i8> %conv.i to <4 x i64>
762 define <4 x i64> @test_mm512_mask_cvtepi16_epi8(<4 x i64> %__O, i32 %__M, <8 x i64> %__A) {
763 ; X86-LABEL: test_mm512_mask_cvtepi16_epi8:
764 ; X86: # %bb.0: # %entry
765 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
766 ; X86-NEXT: vpmovwb %zmm1, %ymm0 {%k1}
769 ; X64-LABEL: test_mm512_mask_cvtepi16_epi8:
770 ; X64: # %bb.0: # %entry
771 ; X64-NEXT: kmovd %edi, %k1
772 ; X64-NEXT: vpmovwb %zmm1, %ymm0 {%k1}
775 %0 = bitcast <8 x i64> %__A to <32 x i16>
776 %conv.i.i = trunc <32 x i16> %0 to <32 x i8>
777 %1 = bitcast <4 x i64> %__O to <32 x i8>
778 %2 = bitcast i32 %__M to <32 x i1>
779 %3 = select <32 x i1> %2, <32 x i8> %conv.i.i, <32 x i8> %1
780 %4 = bitcast <32 x i8> %3 to <4 x i64>
784 define <4 x i64> @test_mm512_maskz_cvtepi16_epi8(i32 %__M, <8 x i64> %__A) {
785 ; X86-LABEL: test_mm512_maskz_cvtepi16_epi8:
786 ; X86: # %bb.0: # %entry
787 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
788 ; X86-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z}
791 ; X64-LABEL: test_mm512_maskz_cvtepi16_epi8:
792 ; X64: # %bb.0: # %entry
793 ; X64-NEXT: kmovd %edi, %k1
794 ; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z}
797 %0 = bitcast <8 x i64> %__A to <32 x i16>
798 %conv.i.i = trunc <32 x i16> %0 to <32 x i8>
799 %1 = bitcast i32 %__M to <32 x i1>
800 %2 = select <32 x i1> %1, <32 x i8> %conv.i.i, <32 x i8> zeroinitializer
801 %3 = bitcast <32 x i8> %2 to <4 x i64>
805 define <8 x i64> @test_mm512_mask2_permutex2var_epi16(<8 x i64> %__A, <8 x i64> %__I, i32 %__U, <8 x i64> %__B) {
806 ; X86-LABEL: test_mm512_mask2_permutex2var_epi16:
807 ; X86: # %bb.0: # %entry
808 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
809 ; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1}
810 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0
813 ; X64-LABEL: test_mm512_mask2_permutex2var_epi16:
814 ; X64: # %bb.0: # %entry
815 ; X64-NEXT: kmovd %edi, %k1
816 ; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1}
817 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0
820 %0 = bitcast <8 x i64> %__A to <32 x i16>
821 %1 = bitcast <8 x i64> %__I to <32 x i16>
822 %2 = bitcast <8 x i64> %__B to <32 x i16>
823 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
824 %4 = bitcast i32 %__U to <32 x i1>
825 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %1
826 %6 = bitcast <32 x i16> %5 to <8 x i64>
830 define <8 x i64> @test_mm512_permutex2var_epi16(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
831 ; CHECK-LABEL: test_mm512_permutex2var_epi16:
832 ; CHECK: # %bb.0: # %entry
833 ; CHECK-NEXT: vpermt2w %zmm2, %zmm1, %zmm0
834 ; CHECK-NEXT: ret{{[l|q]}}
836 %0 = bitcast <8 x i64> %__A to <32 x i16>
837 %1 = bitcast <8 x i64> %__I to <32 x i16>
838 %2 = bitcast <8 x i64> %__B to <32 x i16>
839 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
840 %4 = bitcast <32 x i16> %3 to <8 x i64>
844 define <8 x i64> @test_mm512_mask_permutex2var_epi16(<8 x i64> %__A, i32 %__U, <8 x i64> %__I, <8 x i64> %__B) {
845 ; X86-LABEL: test_mm512_mask_permutex2var_epi16:
846 ; X86: # %bb.0: # %entry
847 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
848 ; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1}
851 ; X64-LABEL: test_mm512_mask_permutex2var_epi16:
852 ; X64: # %bb.0: # %entry
853 ; X64-NEXT: kmovd %edi, %k1
854 ; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1}
857 %0 = bitcast <8 x i64> %__A to <32 x i16>
858 %1 = bitcast <8 x i64> %__I to <32 x i16>
859 %2 = bitcast <8 x i64> %__B to <32 x i16>
860 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
861 %4 = bitcast i32 %__U to <32 x i1>
862 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
863 %6 = bitcast <32 x i16> %5 to <8 x i64>
867 define <8 x i64> @test_mm512_maskz_permutex2var_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
868 ; X86-LABEL: test_mm512_maskz_permutex2var_epi16:
869 ; X86: # %bb.0: # %entry
870 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
871 ; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1} {z}
874 ; X64-LABEL: test_mm512_maskz_permutex2var_epi16:
875 ; X64: # %bb.0: # %entry
876 ; X64-NEXT: kmovd %edi, %k1
877 ; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1} {z}
880 %0 = bitcast <8 x i64> %__A to <32 x i16>
881 %1 = bitcast <8 x i64> %__I to <32 x i16>
882 %2 = bitcast <8 x i64> %__B to <32 x i16>
883 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
884 %4 = bitcast i32 %__U to <32 x i1>
885 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
886 %6 = bitcast <32 x i16> %5 to <8 x i64>
890 declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>)