1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c
7 define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
8 ; X86-LABEL: test_mm512_kunpackd:
9 ; X86: # %bb.0: # %entry
10 ; X86-NEXT: pushl %ebp
11 ; X86-NEXT: .cfi_def_cfa_offset 8
12 ; X86-NEXT: .cfi_offset %ebp, -8
13 ; X86-NEXT: movl %esp, %ebp
14 ; X86-NEXT: .cfi_def_cfa_register %ebp
15 ; X86-NEXT: andl $-64, %esp
16 ; X86-NEXT: subl $64, %esp
17 ; X86-NEXT: vmovdqa64 136(%ebp), %zmm3
18 ; X86-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
19 ; X86-NEXT: vpcmpneqb 8(%ebp), %zmm2, %k1
20 ; X86-NEXT: vpcmpneqb 72(%ebp), %zmm3, %k2
21 ; X86-NEXT: kandd %k0, %k2, %k0
22 ; X86-NEXT: kmovd %k0, %eax
23 ; X86-NEXT: kshiftrq $32, %k2, %k0
24 ; X86-NEXT: kandd %k1, %k0, %k0
25 ; X86-NEXT: kmovd %k0, %edx
26 ; X86-NEXT: movl %ebp, %esp
28 ; X86-NEXT: .cfi_def_cfa %esp, 4
29 ; X86-NEXT: vzeroupper
32 ; X64-LABEL: test_mm512_kunpackd:
33 ; X64: # %bb.0: # %entry
34 ; X64-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
35 ; X64-NEXT: vpcmpneqb %zmm3, %zmm2, %k1
36 ; X64-NEXT: kunpckdq %k0, %k1, %k1
37 ; X64-NEXT: vpcmpneqb %zmm5, %zmm4, %k0 {%k1}
38 ; X64-NEXT: kmovq %k0, %rax
39 ; X64-NEXT: vzeroupper
42 %0 = bitcast <8 x i64> %__E to <64 x i8>
43 %1 = bitcast <8 x i64> %__F to <64 x i8>
44 %2 = bitcast <8 x i64> %__B to <64 x i8>
45 %3 = bitcast <8 x i64> %__A to <64 x i8>
46 %4 = icmp ne <64 x i8> %2, %3
47 %5 = bitcast <8 x i64> %__C to <64 x i8>
48 %6 = bitcast <8 x i64> %__D to <64 x i8>
49 %7 = icmp ne <64 x i8> %5, %6
50 %8 = shufflevector <64 x i1> %4, <64 x i1> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
51 %9 = shufflevector <64 x i1> %7, <64 x i1> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
52 %10 = shufflevector <32 x i1> %8, <32 x i1> %9, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
53 %11 = icmp ne <64 x i8> %0, %1
54 %12 = and <64 x i1> %11, %10
55 %13 = bitcast <64 x i1> %12 to i64
59 define i32 @test_mm512_kunpackw(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
60 ; X86-LABEL: test_mm512_kunpackw:
61 ; X86: # %bb.0: # %entry
62 ; X86-NEXT: pushl %ebp
63 ; X86-NEXT: .cfi_def_cfa_offset 8
64 ; X86-NEXT: .cfi_offset %ebp, -8
65 ; X86-NEXT: movl %esp, %ebp
66 ; X86-NEXT: .cfi_def_cfa_register %ebp
67 ; X86-NEXT: andl $-64, %esp
68 ; X86-NEXT: subl $64, %esp
69 ; X86-NEXT: vmovdqa64 136(%ebp), %zmm3
70 ; X86-NEXT: vpcmpneqw %zmm0, %zmm1, %k0
71 ; X86-NEXT: vpcmpneqw 8(%ebp), %zmm2, %k1
72 ; X86-NEXT: kunpckwd %k0, %k1, %k1
73 ; X86-NEXT: vpcmpneqw 72(%ebp), %zmm3, %k0 {%k1}
74 ; X86-NEXT: kmovd %k0, %eax
75 ; X86-NEXT: movl %ebp, %esp
77 ; X86-NEXT: .cfi_def_cfa %esp, 4
78 ; X86-NEXT: vzeroupper
81 ; X64-LABEL: test_mm512_kunpackw:
82 ; X64: # %bb.0: # %entry
83 ; X64-NEXT: vpcmpneqw %zmm0, %zmm1, %k0
84 ; X64-NEXT: vpcmpneqw %zmm3, %zmm2, %k1
85 ; X64-NEXT: kunpckwd %k0, %k1, %k1
86 ; X64-NEXT: vpcmpneqw %zmm5, %zmm4, %k0 {%k1}
87 ; X64-NEXT: kmovd %k0, %eax
88 ; X64-NEXT: vzeroupper
91 %0 = bitcast <8 x i64> %__E to <32 x i16>
92 %1 = bitcast <8 x i64> %__F to <32 x i16>
93 %2 = bitcast <8 x i64> %__B to <32 x i16>
94 %3 = bitcast <8 x i64> %__A to <32 x i16>
95 %4 = icmp ne <32 x i16> %2, %3
96 %5 = bitcast <8 x i64> %__C to <32 x i16>
97 %6 = bitcast <8 x i64> %__D to <32 x i16>
98 %7 = icmp ne <32 x i16> %5, %6
99 %8 = shufflevector <32 x i1> %4, <32 x i1> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
100 %9 = shufflevector <32 x i1> %7, <32 x i1> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
101 %10 = shufflevector <16 x i1> %8, <16 x i1> %9, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
102 %11 = icmp ne <32 x i16> %0, %1
103 %12 = and <32 x i1> %11, %10
104 %13 = bitcast <32 x i1> %12 to i32
109 define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext %__A) {
110 ; X86-LABEL: test_mm512_mask_set1_epi8:
111 ; X86: # %bb.0: # %entry
112 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
113 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
114 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
115 ; X86-NEXT: kunpckdq %k1, %k0, %k1
116 ; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1}
119 ; X64-LABEL: test_mm512_mask_set1_epi8:
120 ; X64: # %bb.0: # %entry
121 ; X64-NEXT: kmovq %rdi, %k1
122 ; X64-NEXT: vpbroadcastb %esi, %zmm0 {%k1}
125 %vecinit.i.i = insertelement <64 x i8> undef, i8 %__A, i32 0
126 %vecinit63.i.i = shufflevector <64 x i8> %vecinit.i.i, <64 x i8> undef, <64 x i32> zeroinitializer
127 %0 = bitcast <8 x i64> %__O to <64 x i8>
128 %1 = bitcast i64 %__M to <64 x i1>
129 %2 = select <64 x i1> %1, <64 x i8> %vecinit63.i.i, <64 x i8> %0
130 %3 = bitcast <64 x i8> %2 to <8 x i64>
134 define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
135 ; X86-LABEL: test_mm512_maskz_set1_epi8:
136 ; X86: # %bb.0: # %entry
137 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
138 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
139 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
140 ; X86-NEXT: kunpckdq %k1, %k0, %k1
141 ; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1} {z}
144 ; X64-LABEL: test_mm512_maskz_set1_epi8:
145 ; X64: # %bb.0: # %entry
146 ; X64-NEXT: kmovq %rdi, %k1
147 ; X64-NEXT: vpbroadcastb %esi, %zmm0 {%k1} {z}
150 %vecinit.i.i = insertelement <64 x i8> undef, i8 %__A, i32 0
151 %vecinit63.i.i = shufflevector <64 x i8> %vecinit.i.i, <64 x i8> undef, <64 x i32> zeroinitializer
152 %0 = bitcast i64 %__M to <64 x i1>
153 %1 = select <64 x i1> %0, <64 x i8> %vecinit63.i.i, <64 x i8> zeroinitializer
154 %2 = bitcast <64 x i8> %1 to <8 x i64>
158 define <8 x i64> @test_mm512_mask_set1_epi16(<8 x i64> %__O, i32 %__M, i16 signext %__A) {
159 ; X86-LABEL: test_mm512_mask_set1_epi16:
160 ; X86: # %bb.0: # %entry
161 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
162 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
163 ; X86-NEXT: vpbroadcastw %eax, %zmm0 {%k1}
166 ; X64-LABEL: test_mm512_mask_set1_epi16:
167 ; X64: # %bb.0: # %entry
168 ; X64-NEXT: kmovd %edi, %k1
169 ; X64-NEXT: vpbroadcastw %esi, %zmm0 {%k1}
172 %vecinit.i.i = insertelement <32 x i16> undef, i16 %__A, i32 0
173 %vecinit31.i.i = shufflevector <32 x i16> %vecinit.i.i, <32 x i16> undef, <32 x i32> zeroinitializer
174 %0 = bitcast <8 x i64> %__O to <32 x i16>
175 %1 = bitcast i32 %__M to <32 x i1>
176 %2 = select <32 x i1> %1, <32 x i16> %vecinit31.i.i, <32 x i16> %0
177 %3 = bitcast <32 x i16> %2 to <8 x i64>
181 define <8 x i64> @test_mm512_maskz_set1_epi16(i32 %__M, i16 signext %__A) {
182 ; X86-LABEL: test_mm512_maskz_set1_epi16:
183 ; X86: # %bb.0: # %entry
184 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
185 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
186 ; X86-NEXT: vpbroadcastw %eax, %zmm0 {%k1} {z}
189 ; X64-LABEL: test_mm512_maskz_set1_epi16:
190 ; X64: # %bb.0: # %entry
191 ; X64-NEXT: kmovd %edi, %k1
192 ; X64-NEXT: vpbroadcastw %esi, %zmm0 {%k1} {z}
195 %vecinit.i.i = insertelement <32 x i16> undef, i16 %__A, i32 0
196 %vecinit31.i.i = shufflevector <32 x i16> %vecinit.i.i, <32 x i16> undef, <32 x i32> zeroinitializer
197 %0 = bitcast i32 %__M to <32 x i1>
198 %1 = select <32 x i1> %0, <32 x i16> %vecinit31.i.i, <32 x i16> zeroinitializer
199 %2 = bitcast <32 x i16> %1 to <8 x i64>
203 define <8 x i64> @test_mm512_broadcastb_epi8(<2 x i64> %a0) {
204 ; CHECK-LABEL: test_mm512_broadcastb_epi8:
206 ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0
207 ; CHECK-NEXT: ret{{[l|q]}}
208 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
209 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <64 x i32> zeroinitializer
210 %res1 = bitcast <64 x i8> %res0 to <8 x i64>
214 define <8 x i64> @test_mm512_mask_broadcastb_epi8(<8 x i64> %a0, ptr %a1, <2 x i64> %a2) {
215 ; X86-LABEL: test_mm512_mask_broadcastb_epi8:
217 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
218 ; X86-NEXT: kmovq (%eax), %k1
219 ; X86-NEXT: vpbroadcastb %xmm1, %zmm0 {%k1}
222 ; X64-LABEL: test_mm512_mask_broadcastb_epi8:
224 ; X64-NEXT: kmovq (%rdi), %k1
225 ; X64-NEXT: vpbroadcastb %xmm1, %zmm0 {%k1}
227 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
228 %arg1 = load <64 x i1>, ptr %a1
229 %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
230 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <64 x i32> zeroinitializer
231 %res1 = select <64 x i1> %arg1, <64 x i8> %res0, <64 x i8> %arg0
232 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
236 define <8 x i64> @test_mm512_maskz_broadcastb_epi8(ptr %a0, <2 x i64> %a1) {
237 ; X86-LABEL: test_mm512_maskz_broadcastb_epi8:
239 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
240 ; X86-NEXT: kmovq (%eax), %k1
241 ; X86-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z}
244 ; X64-LABEL: test_mm512_maskz_broadcastb_epi8:
246 ; X64-NEXT: kmovq (%rdi), %k1
247 ; X64-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z}
249 %arg0 = load <64 x i1>, ptr %a0
250 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
251 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <64 x i32> zeroinitializer
252 %res1 = select <64 x i1> %arg0, <64 x i8> %res0, <64 x i8> zeroinitializer
253 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
257 define <8 x i64> @test_mm512_broadcastw_epi16(<2 x i64> %a0) {
258 ; CHECK-LABEL: test_mm512_broadcastw_epi16:
260 ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0
261 ; CHECK-NEXT: ret{{[l|q]}}
262 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
263 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <32 x i32> zeroinitializer
264 %res1 = bitcast <32 x i16> %res0 to <8 x i64>
268 define <8 x i64> @test_mm512_mask_broadcastw_epi16(<8 x i64> %a0, i32 %a1, <2 x i64> %a2) {
269 ; X86-LABEL: test_mm512_mask_broadcastw_epi16:
271 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
272 ; X86-NEXT: vpbroadcastw %xmm1, %zmm0 {%k1}
275 ; X64-LABEL: test_mm512_mask_broadcastw_epi16:
277 ; X64-NEXT: kmovd %edi, %k1
278 ; X64-NEXT: vpbroadcastw %xmm1, %zmm0 {%k1}
280 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
281 %arg1 = bitcast i32 %a1 to <32 x i1>
282 %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
283 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <32 x i32> zeroinitializer
284 %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0
285 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
289 define <8 x i64> @test_mm512_maskz_broadcastw_epi16(i32 %a0, <2 x i64> %a1) {
290 ; X86-LABEL: test_mm512_maskz_broadcastw_epi16:
292 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
293 ; X86-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z}
296 ; X64-LABEL: test_mm512_maskz_broadcastw_epi16:
298 ; X64-NEXT: kmovd %edi, %k1
299 ; X64-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z}
301 %arg0 = bitcast i32 %a0 to <32 x i1>
302 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
303 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <32 x i32> zeroinitializer
304 %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer
305 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
309 define <8 x i64> @test_mm512_bslli_epi128(<8 x i64> %a0) {
310 ; CHECK-LABEL: test_mm512_bslli_epi128:
312 ; CHECK-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
313 ; CHECK-NEXT: ret{{[l|q]}}
314 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
315 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> zeroinitializer, <64 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122>
316 %res1 = bitcast <64 x i8> %res0 to <8 x i64>
320 define <8 x i64> @test_mm512_bsrli_epi128(<8 x i64> %a0) {
321 ; CHECK-LABEL: test_mm512_bsrli_epi128:
323 ; CHECK-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zmm0[21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zmm0[37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zmm0[53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero
324 ; CHECK-NEXT: ret{{[l|q]}}
325 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
326 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> zeroinitializer, <64 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 68, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 84, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 100, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115, i32 116>
327 %res1 = bitcast <64 x i8> %res0 to <8 x i64>
331 define <8 x i64> @test_mm512_unpackhi_epi8(<8 x i64> %a0, <8 x i64> %a1) {
332 ; CHECK-LABEL: test_mm512_unpackhi_epi8:
334 ; CHECK-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
335 ; CHECK-NEXT: ret{{[l|q]}}
336 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
337 %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
338 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> %arg1, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
339 %res1 = bitcast <64 x i8> %res0 to <8 x i64>
343 ; TODO - improve support for i64 -> mmask64 on 32-bit targets
344 define <8 x i64> @test_mm512_mask_unpackhi_epi8(<8 x i64> %a0, ptr %a1, <8 x i64> %a2, <8 x i64> %a3) {
345 ; X86-LABEL: test_mm512_mask_unpackhi_epi8:
347 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
348 ; X86-NEXT: kmovq (%eax), %k1
349 ; X86-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
352 ; X64-LABEL: test_mm512_mask_unpackhi_epi8:
354 ; X64-NEXT: kmovq (%rdi), %k1
355 ; X64-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
357 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
358 %sel1 = load <64 x i1>, ptr %a1
359 %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
360 %arg3 = bitcast <8 x i64> %a3 to <64 x i8>
361 %res0 = shufflevector <64 x i8> %arg2, <64 x i8> %arg3, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
362 %res1 = select <64 x i1> %sel1, <64 x i8> %res0, <64 x i8> %arg0
363 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
367 define <8 x i64> @test_mm512_maskz_unpackhi_epi8(ptr %a0, <8 x i64> %a1, <8 x i64> %a2) {
368 ; X86-LABEL: test_mm512_maskz_unpackhi_epi8:
370 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
371 ; X86-NEXT: kmovq (%eax), %k1
372 ; X86-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} {z} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
375 ; X64-LABEL: test_mm512_maskz_unpackhi_epi8:
377 ; X64-NEXT: kmovq (%rdi), %k1
378 ; X64-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} {z} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
380 %sel0 = load <64 x i1>, ptr %a0
381 %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
382 %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
383 %res0 = shufflevector <64 x i8> %arg1, <64 x i8> %arg2, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127>
384 %res1 = select <64 x i1> %sel0, <64 x i8> %res0, <64 x i8> zeroinitializer
385 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
389 define <8 x i64> @test_mm512_unpackhi_epi16(<8 x i64> %a0, <8 x i64> %a1) {
390 ; CHECK-LABEL: test_mm512_unpackhi_epi16:
392 ; CHECK-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
393 ; CHECK-NEXT: ret{{[l|q]}}
394 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
395 %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
396 %res0 = shufflevector <32 x i16> %arg0, <32 x i16> %arg1, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
397 %res1 = bitcast <32 x i16> %res0 to <8 x i64>
401 define <8 x i64> @test_mm512_mask_unpackhi_epi16(<8 x i64> %a0, i32 %a1, <8 x i64> %a2, <8 x i64> %a3) {
402 ; X86-LABEL: test_mm512_mask_unpackhi_epi16:
404 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
405 ; X86-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31]
408 ; X64-LABEL: test_mm512_mask_unpackhi_epi16:
410 ; X64-NEXT: kmovd %edi, %k1
411 ; X64-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31]
413 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
414 %arg1 = bitcast i32 %a1 to <32 x i1>
415 %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
416 %arg3 = bitcast <8 x i64> %a3 to <32 x i16>
417 %res0 = shufflevector <32 x i16> %arg2, <32 x i16> %arg3, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
418 %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0
419 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
423 define <8 x i64> @test_mm512_maskz_unpackhi_epi16(i32 %a0, <8 x i64> %a1, <8 x i64> %a2) {
424 ; X86-LABEL: test_mm512_maskz_unpackhi_epi16:
426 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
427 ; X86-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} {z} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
430 ; X64-LABEL: test_mm512_maskz_unpackhi_epi16:
432 ; X64-NEXT: kmovd %edi, %k1
433 ; X64-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} {z} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
435 %arg0 = bitcast i32 %a0 to <32 x i1>
436 %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
437 %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
438 %res0 = shufflevector <32 x i16> %arg1, <32 x i16> %arg2, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
439 %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer
440 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
444 define <8 x i64> @test_mm512_unpacklo_epi8(<8 x i64> %a0, <8 x i64> %a1) {
445 ; CHECK-LABEL: test_mm512_unpacklo_epi8:
447 ; CHECK-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
448 ; CHECK-NEXT: ret{{[l|q]}}
449 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
450 %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
451 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> %arg1, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
452 %res1 = bitcast <64 x i8> %res0 to <8 x i64>
456 define <8 x i64> @test_mm512_mask_unpacklo_epi8(<8 x i64> %a0, ptr %a1, <8 x i64> %a2, <8 x i64> %a3) {
457 ; X86-LABEL: test_mm512_mask_unpacklo_epi8:
459 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
460 ; X86-NEXT: kmovq (%eax), %k1
461 ; X86-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
464 ; X64-LABEL: test_mm512_mask_unpacklo_epi8:
466 ; X64-NEXT: kmovq (%rdi), %k1
467 ; X64-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
469 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
470 %sel1 = load <64 x i1>, ptr %a1
471 %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
472 %arg3 = bitcast <8 x i64> %a3 to <64 x i8>
473 %res0 = shufflevector <64 x i8> %arg2, <64 x i8> %arg3, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
474 %res1 = select <64 x i1> %sel1, <64 x i8> %res0, <64 x i8> %arg0
475 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
479 define <8 x i64> @test_mm512_maskz_unpacklo_epi8(ptr %a0, <8 x i64> %a1, <8 x i64> %a2) {
480 ; X86-LABEL: test_mm512_maskz_unpacklo_epi8:
482 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
483 ; X86-NEXT: kmovq (%eax), %k1
484 ; X86-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
487 ; X64-LABEL: test_mm512_maskz_unpacklo_epi8:
489 ; X64-NEXT: kmovq (%rdi), %k1
490 ; X64-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
492 %sel0 = load <64 x i1>, ptr %a0
493 %arg1 = bitcast <8 x i64> %a1 to <64 x i8>
494 %arg2 = bitcast <8 x i64> %a2 to <64 x i8>
495 %res0 = shufflevector <64 x i8> %arg1, <64 x i8> %arg2, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
496 %res1 = select <64 x i1> %sel0, <64 x i8> %res0, <64 x i8> zeroinitializer
497 %res2 = bitcast <64 x i8> %res1 to <8 x i64>
501 define <8 x i64> @test_mm512_unpacklo_epi16(<8 x i64> %a0, <8 x i64> %a1) {
502 ; CHECK-LABEL: test_mm512_unpacklo_epi16:
504 ; CHECK-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
505 ; CHECK-NEXT: ret{{[l|q]}}
506 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
507 %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
508 %res0 = shufflevector <32 x i16> %arg0, <32 x i16> %arg1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
509 %res1 = bitcast <32 x i16> %res0 to <8 x i64>
513 define <8 x i64> @test_mm512_mask_unpacklo_epi16(<8 x i64> %a0, i32 %a1, <8 x i64> %a2, <8 x i64> %a3) {
514 ; X86-LABEL: test_mm512_mask_unpacklo_epi16:
516 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
517 ; X86-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27]
520 ; X64-LABEL: test_mm512_mask_unpacklo_epi16:
522 ; X64-NEXT: kmovd %edi, %k1
523 ; X64-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27]
525 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
526 %arg1 = bitcast i32 %a1 to <32 x i1>
527 %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
528 %arg3 = bitcast <8 x i64> %a3 to <32 x i16>
529 %res0 = shufflevector <32 x i16> %arg2, <32 x i16> %arg3, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
530 %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0
531 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
535 define <8 x i64> @test_mm512_maskz_unpacklo_epi16(i32 %a0, <8 x i64> %a1, <8 x i64> %a2) {
536 ; X86-LABEL: test_mm512_maskz_unpacklo_epi16:
538 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
539 ; X86-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
542 ; X64-LABEL: test_mm512_maskz_unpacklo_epi16:
544 ; X64-NEXT: kmovd %edi, %k1
545 ; X64-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
547 %arg0 = bitcast i32 %a0 to <32 x i1>
548 %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
549 %arg2 = bitcast <8 x i64> %a2 to <32 x i16>
550 %res0 = shufflevector <32 x i16> %arg1, <32 x i16> %arg2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59>
551 %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer
552 %res2 = bitcast <32 x i16> %res1 to <8 x i64>
556 define i64 @test_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) {
557 ; X86-LABEL: test_mm512_test_epi8_mask:
558 ; X86: # %bb.0: # %entry
559 ; X86-NEXT: vptestmb %zmm0, %zmm1, %k0
560 ; X86-NEXT: kshiftrq $32, %k0, %k1
561 ; X86-NEXT: kmovd %k0, %eax
562 ; X86-NEXT: kmovd %k1, %edx
563 ; X86-NEXT: vzeroupper
566 ; X64-LABEL: test_mm512_test_epi8_mask:
567 ; X64: # %bb.0: # %entry
568 ; X64-NEXT: vptestmb %zmm0, %zmm1, %k0
569 ; X64-NEXT: kmovq %k0, %rax
570 ; X64-NEXT: vzeroupper
573 %and1.i.i = and <8 x i64> %__B, %__A
574 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
575 %1 = icmp ne <64 x i8> %0, zeroinitializer
576 %2 = bitcast <64 x i1> %1 to i64
580 define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
581 ; X86-LABEL: test_mm512_mask_test_epi8_mask:
582 ; X86: # %bb.0: # %entry
583 ; X86-NEXT: vptestmb %zmm0, %zmm1, %k0
584 ; X86-NEXT: kshiftrq $32, %k0, %k1
585 ; X86-NEXT: kmovd %k1, %edx
586 ; X86-NEXT: kmovd %k0, %eax
587 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
588 ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
589 ; X86-NEXT: vzeroupper
592 ; X64-LABEL: test_mm512_mask_test_epi8_mask:
593 ; X64: # %bb.0: # %entry
594 ; X64-NEXT: kmovq %rdi, %k1
595 ; X64-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1}
596 ; X64-NEXT: kmovq %k0, %rax
597 ; X64-NEXT: vzeroupper
600 %and1.i.i = and <8 x i64> %__B, %__A
601 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
602 %1 = icmp ne <64 x i8> %0, zeroinitializer
603 %2 = bitcast i64 %__U to <64 x i1>
604 %3 = and <64 x i1> %1, %2
605 %4 = bitcast <64 x i1> %3 to i64
609 define i32 @test_mm512_test_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) {
610 ; CHECK-LABEL: test_mm512_test_epi16_mask:
611 ; CHECK: # %bb.0: # %entry
612 ; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0
613 ; CHECK-NEXT: kmovd %k0, %eax
614 ; CHECK-NEXT: vzeroupper
615 ; CHECK-NEXT: ret{{[l|q]}}
617 %and1.i.i = and <8 x i64> %__B, %__A
618 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
619 %1 = icmp ne <32 x i16> %0, zeroinitializer
620 %2 = bitcast <32 x i1> %1 to i32
624 define i32 @test_mm512_mask_test_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
625 ; X86-LABEL: test_mm512_mask_test_epi16_mask:
626 ; X86: # %bb.0: # %entry
627 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
628 ; X86-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1}
629 ; X86-NEXT: kmovd %k0, %eax
630 ; X86-NEXT: vzeroupper
633 ; X64-LABEL: test_mm512_mask_test_epi16_mask:
634 ; X64: # %bb.0: # %entry
635 ; X64-NEXT: kmovd %edi, %k1
636 ; X64-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1}
637 ; X64-NEXT: kmovd %k0, %eax
638 ; X64-NEXT: vzeroupper
641 %and1.i.i = and <8 x i64> %__B, %__A
642 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
643 %1 = icmp ne <32 x i16> %0, zeroinitializer
644 %2 = bitcast i32 %__U to <32 x i1>
645 %3 = and <32 x i1> %1, %2
646 %4 = bitcast <32 x i1> %3 to i32
650 define i64 @test_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) {
651 ; X86-LABEL: test_mm512_testn_epi8_mask:
652 ; X86: # %bb.0: # %entry
653 ; X86-NEXT: vptestnmb %zmm0, %zmm1, %k0
654 ; X86-NEXT: kshiftrq $32, %k0, %k1
655 ; X86-NEXT: kmovd %k0, %eax
656 ; X86-NEXT: kmovd %k1, %edx
657 ; X86-NEXT: vzeroupper
660 ; X64-LABEL: test_mm512_testn_epi8_mask:
661 ; X64: # %bb.0: # %entry
662 ; X64-NEXT: vptestnmb %zmm0, %zmm1, %k0
663 ; X64-NEXT: kmovq %k0, %rax
664 ; X64-NEXT: vzeroupper
667 %and1.i.i = and <8 x i64> %__B, %__A
668 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
669 %1 = icmp eq <64 x i8> %0, zeroinitializer
670 %2 = bitcast <64 x i1> %1 to i64
674 define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
675 ; X86-LABEL: test_mm512_mask_testn_epi8_mask:
676 ; X86: # %bb.0: # %entry
677 ; X86-NEXT: vptestnmb %zmm0, %zmm1, %k0
678 ; X86-NEXT: kshiftrq $32, %k0, %k1
679 ; X86-NEXT: kmovd %k1, %edx
680 ; X86-NEXT: kmovd %k0, %eax
681 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
682 ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
683 ; X86-NEXT: vzeroupper
686 ; X64-LABEL: test_mm512_mask_testn_epi8_mask:
687 ; X64: # %bb.0: # %entry
688 ; X64-NEXT: kmovq %rdi, %k1
689 ; X64-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1}
690 ; X64-NEXT: kmovq %k0, %rax
691 ; X64-NEXT: vzeroupper
694 %and1.i.i = and <8 x i64> %__B, %__A
695 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
696 %1 = icmp eq <64 x i8> %0, zeroinitializer
697 %2 = bitcast i64 %__U to <64 x i1>
698 %3 = and <64 x i1> %1, %2
699 %4 = bitcast <64 x i1> %3 to i64
703 define i32 @test_mm512_testn_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) {
704 ; CHECK-LABEL: test_mm512_testn_epi16_mask:
705 ; CHECK: # %bb.0: # %entry
706 ; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0
707 ; CHECK-NEXT: kmovd %k0, %eax
708 ; CHECK-NEXT: vzeroupper
709 ; CHECK-NEXT: ret{{[l|q]}}
711 %and1.i.i = and <8 x i64> %__B, %__A
712 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
713 %1 = icmp eq <32 x i16> %0, zeroinitializer
714 %2 = bitcast <32 x i1> %1 to i32
718 define i32 @test_mm512_mask_testn_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
719 ; X86-LABEL: test_mm512_mask_testn_epi16_mask:
720 ; X86: # %bb.0: # %entry
721 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
722 ; X86-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1}
723 ; X86-NEXT: kmovd %k0, %eax
724 ; X86-NEXT: vzeroupper
727 ; X64-LABEL: test_mm512_mask_testn_epi16_mask:
728 ; X64: # %bb.0: # %entry
729 ; X64-NEXT: kmovd %edi, %k1
730 ; X64-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1}
731 ; X64-NEXT: kmovd %k0, %eax
732 ; X64-NEXT: vzeroupper
735 %and1.i.i = and <8 x i64> %__B, %__A
736 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
737 %1 = icmp eq <32 x i16> %0, zeroinitializer
738 %2 = bitcast i32 %__U to <32 x i1>
739 %3 = and <32 x i1> %1, %2
740 %4 = bitcast <32 x i1> %3 to i32
744 define <4 x i64> @test_mm512_cvtepi16_epi8(<8 x i64> %__A) {
745 ; CHECK-LABEL: test_mm512_cvtepi16_epi8:
746 ; CHECK: # %bb.0: # %entry
747 ; CHECK-NEXT: vpmovwb %zmm0, %ymm0
748 ; CHECK-NEXT: ret{{[l|q]}}
750 %0 = bitcast <8 x i64> %__A to <32 x i16>
751 %conv.i = trunc <32 x i16> %0 to <32 x i8>
752 %1 = bitcast <32 x i8> %conv.i to <4 x i64>
756 define <4 x i64> @test_mm512_mask_cvtepi16_epi8(<4 x i64> %__O, i32 %__M, <8 x i64> %__A) {
757 ; X86-LABEL: test_mm512_mask_cvtepi16_epi8:
758 ; X86: # %bb.0: # %entry
759 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
760 ; X86-NEXT: vpmovwb %zmm1, %ymm0 {%k1}
763 ; X64-LABEL: test_mm512_mask_cvtepi16_epi8:
764 ; X64: # %bb.0: # %entry
765 ; X64-NEXT: kmovd %edi, %k1
766 ; X64-NEXT: vpmovwb %zmm1, %ymm0 {%k1}
769 %0 = bitcast <8 x i64> %__A to <32 x i16>
770 %conv.i.i = trunc <32 x i16> %0 to <32 x i8>
771 %1 = bitcast <4 x i64> %__O to <32 x i8>
772 %2 = bitcast i32 %__M to <32 x i1>
773 %3 = select <32 x i1> %2, <32 x i8> %conv.i.i, <32 x i8> %1
774 %4 = bitcast <32 x i8> %3 to <4 x i64>
778 define <4 x i64> @test_mm512_maskz_cvtepi16_epi8(i32 %__M, <8 x i64> %__A) {
779 ; X86-LABEL: test_mm512_maskz_cvtepi16_epi8:
780 ; X86: # %bb.0: # %entry
781 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
782 ; X86-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z}
785 ; X64-LABEL: test_mm512_maskz_cvtepi16_epi8:
786 ; X64: # %bb.0: # %entry
787 ; X64-NEXT: kmovd %edi, %k1
788 ; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z}
791 %0 = bitcast <8 x i64> %__A to <32 x i16>
792 %conv.i.i = trunc <32 x i16> %0 to <32 x i8>
793 %1 = bitcast i32 %__M to <32 x i1>
794 %2 = select <32 x i1> %1, <32 x i8> %conv.i.i, <32 x i8> zeroinitializer
795 %3 = bitcast <32 x i8> %2 to <4 x i64>
799 define <8 x i64> @test_mm512_mask2_permutex2var_epi16(<8 x i64> %__A, <8 x i64> %__I, i32 %__U, <8 x i64> %__B) {
800 ; X86-LABEL: test_mm512_mask2_permutex2var_epi16:
801 ; X86: # %bb.0: # %entry
802 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
803 ; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1}
804 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0
807 ; X64-LABEL: test_mm512_mask2_permutex2var_epi16:
808 ; X64: # %bb.0: # %entry
809 ; X64-NEXT: kmovd %edi, %k1
810 ; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1}
811 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0
814 %0 = bitcast <8 x i64> %__A to <32 x i16>
815 %1 = bitcast <8 x i64> %__I to <32 x i16>
816 %2 = bitcast <8 x i64> %__B to <32 x i16>
817 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
818 %4 = bitcast i32 %__U to <32 x i1>
819 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %1
820 %6 = bitcast <32 x i16> %5 to <8 x i64>
824 define <8 x i64> @test_mm512_permutex2var_epi16(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
825 ; CHECK-LABEL: test_mm512_permutex2var_epi16:
826 ; CHECK: # %bb.0: # %entry
827 ; CHECK-NEXT: vpermt2w %zmm2, %zmm1, %zmm0
828 ; CHECK-NEXT: ret{{[l|q]}}
830 %0 = bitcast <8 x i64> %__A to <32 x i16>
831 %1 = bitcast <8 x i64> %__I to <32 x i16>
832 %2 = bitcast <8 x i64> %__B to <32 x i16>
833 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
834 %4 = bitcast <32 x i16> %3 to <8 x i64>
838 define <8 x i64> @test_mm512_mask_permutex2var_epi16(<8 x i64> %__A, i32 %__U, <8 x i64> %__I, <8 x i64> %__B) {
839 ; X86-LABEL: test_mm512_mask_permutex2var_epi16:
840 ; X86: # %bb.0: # %entry
841 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
842 ; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1}
845 ; X64-LABEL: test_mm512_mask_permutex2var_epi16:
846 ; X64: # %bb.0: # %entry
847 ; X64-NEXT: kmovd %edi, %k1
848 ; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1}
851 %0 = bitcast <8 x i64> %__A to <32 x i16>
852 %1 = bitcast <8 x i64> %__I to <32 x i16>
853 %2 = bitcast <8 x i64> %__B to <32 x i16>
854 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
855 %4 = bitcast i32 %__U to <32 x i1>
856 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
857 %6 = bitcast <32 x i16> %5 to <8 x i64>
861 define <8 x i64> @test_mm512_maskz_permutex2var_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
862 ; X86-LABEL: test_mm512_maskz_permutex2var_epi16:
863 ; X86: # %bb.0: # %entry
864 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
865 ; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1} {z}
868 ; X64-LABEL: test_mm512_maskz_permutex2var_epi16:
869 ; X64: # %bb.0: # %entry
870 ; X64-NEXT: kmovd %edi, %k1
871 ; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1} {z}
874 %0 = bitcast <8 x i64> %__A to <32 x i16>
875 %1 = bitcast <8 x i64> %__I to <32 x i16>
876 %2 = bitcast <8 x i64> %__B to <32 x i16>
877 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
878 %4 = bitcast i32 %__U to <32 x i1>
879 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
880 %6 = bitcast <32 x i16> %5 to <8 x i64>
884 declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>)