1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq,avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
5 define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
6 ; KNL-LABEL: kshiftl_v8i1_1:
8 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
9 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
10 ; KNL-NEXT: movb $-2, %al
11 ; KNL-NEXT: kmovw %eax, %k1
12 ; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
13 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
14 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15 ; KNL-NEXT: kmovw %k0, %eax
16 ; KNL-NEXT: # kill: def $al killed $al killed $eax
17 ; KNL-NEXT: vzeroupper
20 ; SKX-LABEL: kshiftl_v8i1_1:
22 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
23 ; SKX-NEXT: kshiftlb $1, %k0, %k1
24 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
25 ; SKX-NEXT: kmovd %k0, %eax
26 ; SKX-NEXT: # kill: def $al killed $al killed $eax
27 ; SKX-NEXT: vzeroupper
29 %a = icmp eq <8 x i64> %x, zeroinitializer
30 %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
31 %c = icmp eq <8 x i64> %y, zeroinitializer
32 %d = and <8 x i1> %b, %c
33 %e = bitcast <8 x i1> %d to i8
37 define i16 @kshiftl_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
38 ; KNL-LABEL: kshiftl_v16i1_1:
40 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
41 ; KNL-NEXT: kshiftlw $1, %k0, %k1
42 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k0 {%k1}
43 ; KNL-NEXT: kmovw %k0, %eax
44 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
45 ; KNL-NEXT: vzeroupper
48 ; SKX-LABEL: kshiftl_v16i1_1:
50 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
51 ; SKX-NEXT: kshiftlw $1, %k0, %k1
52 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k0 {%k1}
53 ; SKX-NEXT: kmovd %k0, %eax
54 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
55 ; SKX-NEXT: vzeroupper
57 %a = icmp eq <16 x i32> %x, zeroinitializer
58 %b = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
59 %c = icmp eq <16 x i32> %y, zeroinitializer
60 %d = and <16 x i1> %b, %c
61 %e = bitcast <16 x i1> %d to i16
65 define i32 @kshiftl_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
66 ; KNL-LABEL: kshiftl_v32i1_1:
68 ; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4
69 ; KNL-NEXT: vpcmpeqw %ymm4, %ymm1, %ymm1
70 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
71 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
72 ; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
73 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
74 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
75 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
76 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
77 ; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm0[15],zmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
78 ; KNL-NEXT: kshiftlw $1, %k2, %k1
79 ; KNL-NEXT: vpcmpeqw %ymm4, %ymm3, %ymm1
80 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
81 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k2
82 ; KNL-NEXT: vpcmpeqw %ymm4, %ymm2, %ymm1
83 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
84 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1}
85 ; KNL-NEXT: kmovw %k0, %ecx
86 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2}
87 ; KNL-NEXT: kmovw %k0, %eax
88 ; KNL-NEXT: shll $16, %eax
89 ; KNL-NEXT: orl %ecx, %eax
90 ; KNL-NEXT: vzeroupper
93 ; SKX-LABEL: kshiftl_v32i1_1:
95 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
96 ; SKX-NEXT: kshiftld $1, %k0, %k1
97 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k0 {%k1}
98 ; SKX-NEXT: kmovd %k0, %eax
99 ; SKX-NEXT: vzeroupper
101 %a = icmp eq <32 x i16> %x, zeroinitializer
102 %b = shufflevector <32 x i1> %a, <32 x i1> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
103 %c = icmp eq <32 x i16> %y, zeroinitializer
104 %d = and <32 x i1> %b, %c
105 %e = bitcast <32 x i1> %d to i32
109 define i64 @kshiftl_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
110 ; KNL-LABEL: kshiftl_v64i1_1:
112 ; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4
113 ; KNL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
114 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm5
115 ; KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
116 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
117 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
118 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
119 ; KNL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm0
120 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
121 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
122 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k3
123 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
124 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k4
125 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
126 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
127 ; KNL-NEXT: valignd {{.*#+}} zmm1 = zmm0[15],zmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
128 ; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z}
129 ; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm5[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
130 ; KNL-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z}
131 ; KNL-NEXT: valignd {{.*#+}} zmm5 = zmm6[15],zmm5[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
132 ; KNL-NEXT: kshiftlw $1, %k1, %k3
133 ; KNL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm3
134 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm6
135 ; KNL-NEXT: vpmovsxbd %xmm6, %zmm6
136 ; KNL-NEXT: vptestmd %zmm6, %zmm6, %k1
137 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
138 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k2
139 ; KNL-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2
140 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
141 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
142 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k4
143 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
144 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k3}
145 ; KNL-NEXT: kmovw %k0, %eax
146 ; KNL-NEXT: vptestmd %zmm5, %zmm5, %k0 {%k4}
147 ; KNL-NEXT: kmovw %k0, %ecx
148 ; KNL-NEXT: shll $16, %ecx
149 ; KNL-NEXT: orl %eax, %ecx
150 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2}
151 ; KNL-NEXT: kmovw %k0, %edx
152 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1}
153 ; KNL-NEXT: kmovw %k0, %eax
154 ; KNL-NEXT: shll $16, %eax
155 ; KNL-NEXT: orl %edx, %eax
156 ; KNL-NEXT: shlq $32, %rax
157 ; KNL-NEXT: orq %rcx, %rax
158 ; KNL-NEXT: vzeroupper
161 ; SKX-LABEL: kshiftl_v64i1_1:
163 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
164 ; SKX-NEXT: kshiftlq $1, %k0, %k1
165 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k0 {%k1}
166 ; SKX-NEXT: kmovq %k0, %rax
167 ; SKX-NEXT: vzeroupper
169 %a = icmp eq <64 x i8> %x, zeroinitializer
170 %b = shufflevector <64 x i1> %a, <64 x i1> zeroinitializer, <64 x i32> <i32 64, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
171 %c = icmp eq <64 x i8> %y, zeroinitializer
172 %d = and <64 x i1> %b, %c
173 %e = bitcast <64 x i1> %d to i64
177 define i8 @kshiftl_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
178 ; KNL-LABEL: kshiftl_v8i1_7:
180 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
181 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
182 ; KNL-NEXT: movb $-128, %al
183 ; KNL-NEXT: kmovw %eax, %k1
184 ; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
185 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
186 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
187 ; KNL-NEXT: kmovw %k0, %eax
188 ; KNL-NEXT: # kill: def $al killed $al killed $eax
189 ; KNL-NEXT: vzeroupper
192 ; SKX-LABEL: kshiftl_v8i1_7:
194 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
195 ; SKX-NEXT: kshiftlb $7, %k0, %k1
196 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
197 ; SKX-NEXT: kmovd %k0, %eax
198 ; SKX-NEXT: # kill: def $al killed $al killed $eax
199 ; SKX-NEXT: vzeroupper
201 %a = icmp eq <8 x i64> %x, zeroinitializer
202 %b = shufflevector <8 x i1> zeroinitializer, <8 x i1> %a, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
203 %c = icmp eq <8 x i64> %y, zeroinitializer
204 %d = and <8 x i1> %b, %c
205 %e = bitcast <8 x i1> %d to i8
209 define i16 @kshiftl_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
210 ; KNL-LABEL: kshiftl_v16i1_15:
212 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
213 ; KNL-NEXT: kshiftlw $15, %k0, %k1
214 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k0 {%k1}
215 ; KNL-NEXT: kmovw %k0, %eax
216 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
217 ; KNL-NEXT: vzeroupper
220 ; SKX-LABEL: kshiftl_v16i1_15:
222 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
223 ; SKX-NEXT: kshiftlw $15, %k0, %k1
224 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k0 {%k1}
225 ; SKX-NEXT: kmovd %k0, %eax
226 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
227 ; SKX-NEXT: vzeroupper
229 %a = icmp eq <16 x i32> %x, zeroinitializer
230 %b = shufflevector <16 x i1> zeroinitializer, <16 x i1> %a, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
231 %c = icmp eq <16 x i32> %y, zeroinitializer
232 %d = and <16 x i1> %b, %c
233 %e = bitcast <16 x i1> %d to i16
237 define i32 @kshiftl_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
238 ; KNL-LABEL: kshiftl_v32i1_31:
240 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
241 ; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
242 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
243 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
244 ; KNL-NEXT: kshiftlw $15, %k0, %k1
245 ; KNL-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm0
246 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
247 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
248 ; KNL-NEXT: kmovw %k0, %eax
249 ; KNL-NEXT: shll $16, %eax
250 ; KNL-NEXT: vzeroupper
253 ; SKX-LABEL: kshiftl_v32i1_31:
255 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
256 ; SKX-NEXT: kshiftld $31, %k0, %k1
257 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k0 {%k1}
258 ; SKX-NEXT: kmovd %k0, %eax
259 ; SKX-NEXT: vzeroupper
261 %a = icmp eq <32 x i16> %x, zeroinitializer
262 %b = shufflevector <32 x i1> zeroinitializer, <32 x i1> %a, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
263 %c = icmp eq <32 x i16> %y, zeroinitializer
264 %d = and <32 x i1> %b, %c
265 %e = bitcast <32 x i1> %d to i32
269 define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
270 ; KNL-LABEL: kshiftl_v64i1_63:
272 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
273 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
274 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
275 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
276 ; KNL-NEXT: kshiftlw $15, %k0, %k1
277 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm0
278 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
279 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
280 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
281 ; KNL-NEXT: kmovw %k0, %eax
282 ; KNL-NEXT: movzwl %ax, %eax
283 ; KNL-NEXT: shlq $48, %rax
284 ; KNL-NEXT: vzeroupper
287 ; SKX-LABEL: kshiftl_v64i1_63:
289 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
290 ; SKX-NEXT: kshiftlq $63, %k0, %k1
291 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k0 {%k1}
292 ; SKX-NEXT: kmovq %k0, %rax
293 ; SKX-NEXT: vzeroupper
295 %a = icmp eq <64 x i8> %x, zeroinitializer
296 %b = shufflevector <64 x i1> zeroinitializer, <64 x i1> %a, <64 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64>
297 %c = icmp eq <64 x i8> %y, zeroinitializer
298 %d = and <64 x i1> %b, %c
299 %e = bitcast <64 x i1> %d to i64
303 define i8 @kshiftr_v8i1_1(<8 x i64> %x, <8 x i64> %y) {
304 ; KNL-LABEL: kshiftr_v8i1_1:
306 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
307 ; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
308 ; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
309 ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,2,3,4,5,6,7,15]
310 ; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm3
311 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
312 ; KNL-NEXT: vptestmq %zmm3, %zmm3, %k0 {%k1}
313 ; KNL-NEXT: kmovw %k0, %eax
314 ; KNL-NEXT: # kill: def $al killed $al killed $eax
315 ; KNL-NEXT: vzeroupper
318 ; SKX-LABEL: kshiftr_v8i1_1:
320 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
321 ; SKX-NEXT: kshiftrb $1, %k0, %k1
322 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
323 ; SKX-NEXT: kmovd %k0, %eax
324 ; SKX-NEXT: # kill: def $al killed $al killed $eax
325 ; SKX-NEXT: vzeroupper
327 %a = icmp eq <8 x i64> %x, zeroinitializer
328 %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
329 %c = icmp eq <8 x i64> %y, zeroinitializer
330 %d = and <8 x i1> %b, %c
331 %e = bitcast <8 x i1> %d to i8
335 define i16 @kshiftr_v16i1_1(<16 x i32> %x, <16 x i32> %y) {
336 ; KNL-LABEL: kshiftr_v16i1_1:
338 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
339 ; KNL-NEXT: kshiftrw $1, %k0, %k1
340 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k0 {%k1}
341 ; KNL-NEXT: kmovw %k0, %eax
342 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
343 ; KNL-NEXT: vzeroupper
346 ; SKX-LABEL: kshiftr_v16i1_1:
348 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
349 ; SKX-NEXT: kshiftrw $1, %k0, %k1
350 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k0 {%k1}
351 ; SKX-NEXT: kmovd %k0, %eax
352 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
353 ; SKX-NEXT: vzeroupper
355 %a = icmp eq <16 x i32> %x, zeroinitializer
356 %b = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
357 %c = icmp eq <16 x i32> %y, zeroinitializer
358 %d = and <16 x i1> %b, %c
359 %e = bitcast <16 x i1> %d to i16
363 define i32 @kshiftr_v32i1_1(<32 x i16> %x, <32 x i16> %y) {
364 ; KNL-LABEL: kshiftr_v32i1_1:
366 ; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4
367 ; KNL-NEXT: vpcmpeqw %ymm4, %ymm1, %ymm1
368 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
369 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
370 ; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
371 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
372 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
373 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
374 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
375 ; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0]
376 ; KNL-NEXT: kshiftrw $1, %k1, %k1
377 ; KNL-NEXT: vpcmpeqw %ymm4, %ymm2, %ymm1
378 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
379 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k2
380 ; KNL-NEXT: vpcmpeqw %ymm4, %ymm3, %ymm1
381 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
382 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1}
383 ; KNL-NEXT: kmovw %k0, %ecx
384 ; KNL-NEXT: shll $16, %ecx
385 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2}
386 ; KNL-NEXT: kmovw %k0, %eax
387 ; KNL-NEXT: orl %ecx, %eax
388 ; KNL-NEXT: vzeroupper
391 ; SKX-LABEL: kshiftr_v32i1_1:
393 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
394 ; SKX-NEXT: kshiftrd $1, %k0, %k1
395 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k0 {%k1}
396 ; SKX-NEXT: kmovd %k0, %eax
397 ; SKX-NEXT: vzeroupper
399 %a = icmp eq <32 x i16> %x, zeroinitializer
400 %b = shufflevector <32 x i1> %a, <32 x i1> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
401 %c = icmp eq <32 x i16> %y, zeroinitializer
402 %d = and <32 x i1> %b, %c
403 %e = bitcast <32 x i1> %d to i32
407 define i64 @kshiftr_v64i1_1(<64 x i8> %x, <64 x i8> %y) {
408 ; KNL-LABEL: kshiftr_v64i1_1:
410 ; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4
411 ; KNL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm1
412 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm5
413 ; KNL-NEXT: vpmovsxbd %xmm5, %zmm5
414 ; KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
415 ; KNL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
416 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm5
417 ; KNL-NEXT: vptestmd %zmm5, %zmm5, %k2
418 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
419 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k3
420 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
421 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
422 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k4
423 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
424 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
425 ; KNL-NEXT: valignd {{.*#+}} zmm5 = zmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0]
426 ; KNL-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
427 ; KNL-NEXT: valignd {{.*#+}} zmm0 = zmm6[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0]
428 ; KNL-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k1} {z}
429 ; KNL-NEXT: valignd {{.*#+}} zmm1 = zmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm6[0]
430 ; KNL-NEXT: kshiftrw $1, %k1, %k3
431 ; KNL-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2
432 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm6
433 ; KNL-NEXT: vpmovsxbd %xmm6, %zmm6
434 ; KNL-NEXT: vptestmd %zmm6, %zmm6, %k1
435 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
436 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
437 ; KNL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm2
438 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm3
439 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k4
440 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
441 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
442 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k3}
443 ; KNL-NEXT: kmovw %k0, %eax
444 ; KNL-NEXT: shll $16, %eax
445 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k4}
446 ; KNL-NEXT: kmovw %k0, %ecx
447 ; KNL-NEXT: orl %eax, %ecx
448 ; KNL-NEXT: shlq $32, %rcx
449 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2}
450 ; KNL-NEXT: kmovw %k0, %edx
451 ; KNL-NEXT: vptestmd %zmm5, %zmm5, %k0 {%k1}
452 ; KNL-NEXT: kmovw %k0, %eax
453 ; KNL-NEXT: shll $16, %eax
454 ; KNL-NEXT: orl %edx, %eax
455 ; KNL-NEXT: orq %rcx, %rax
456 ; KNL-NEXT: vzeroupper
459 ; SKX-LABEL: kshiftr_v64i1_1:
461 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
462 ; SKX-NEXT: kshiftrq $1, %k0, %k1
463 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k0 {%k1}
464 ; SKX-NEXT: kmovq %k0, %rax
465 ; SKX-NEXT: vzeroupper
467 %a = icmp eq <64 x i8> %x, zeroinitializer
468 %b = shufflevector <64 x i1> %a, <64 x i1> zeroinitializer, <64 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64>
469 %c = icmp eq <64 x i8> %y, zeroinitializer
470 %d = and <64 x i1> %b, %c
471 %e = bitcast <64 x i1> %d to i64
475 define i8 @kshiftr_v8i1_7(<8 x i64> %x, <8 x i64> %y) {
476 ; KNL-LABEL: kshiftr_v8i1_7:
478 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
479 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
480 ; KNL-NEXT: movb $-2, %al
481 ; KNL-NEXT: kmovw %eax, %k1
482 ; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
483 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
484 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
485 ; KNL-NEXT: kmovw %k0, %eax
486 ; KNL-NEXT: # kill: def $al killed $al killed $eax
487 ; KNL-NEXT: vzeroupper
490 ; SKX-LABEL: kshiftr_v8i1_7:
492 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
493 ; SKX-NEXT: kshiftlb $1, %k0, %k1
494 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
495 ; SKX-NEXT: kmovd %k0, %eax
496 ; SKX-NEXT: # kill: def $al killed $al killed $eax
497 ; SKX-NEXT: vzeroupper
499 %a = icmp eq <8 x i64> %x, zeroinitializer
500 %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
501 %c = icmp eq <8 x i64> %y, zeroinitializer
502 %d = and <8 x i1> %b, %c
503 %e = bitcast <8 x i1> %d to i8
507 define i16 @kshiftr_v16i1_15(<16 x i32> %x, <16 x i32> %y) {
508 ; KNL-LABEL: kshiftr_v16i1_15:
510 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
511 ; KNL-NEXT: kshiftrw $15, %k0, %k1
512 ; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k0 {%k1}
513 ; KNL-NEXT: kmovw %k0, %eax
514 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
515 ; KNL-NEXT: vzeroupper
518 ; SKX-LABEL: kshiftr_v16i1_15:
520 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
521 ; SKX-NEXT: kshiftrw $15, %k0, %k1
522 ; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k0 {%k1}
523 ; SKX-NEXT: kmovd %k0, %eax
524 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
525 ; SKX-NEXT: vzeroupper
527 %a = icmp eq <16 x i32> %x, zeroinitializer
528 %b = shufflevector <16 x i1> zeroinitializer, <16 x i1> %a, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
529 %c = icmp eq <16 x i32> %y, zeroinitializer
530 %d = and <16 x i1> %b, %c
531 %e = bitcast <16 x i1> %d to i16
535 define i32 @kshiftr_v32i1_31(<32 x i16> %x, <32 x i16> %y) {
536 ; KNL-LABEL: kshiftr_v32i1_31:
538 ; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
539 ; KNL-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm1
540 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
541 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
542 ; KNL-NEXT: kshiftrw $15, %k0, %k1
543 ; KNL-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
544 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
545 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
546 ; KNL-NEXT: kmovw %k0, %eax
547 ; KNL-NEXT: vzeroupper
550 ; SKX-LABEL: kshiftr_v32i1_31:
552 ; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
553 ; SKX-NEXT: kshiftrd $31, %k0, %k1
554 ; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k0 {%k1}
555 ; SKX-NEXT: kmovd %k0, %eax
556 ; SKX-NEXT: vzeroupper
558 %a = icmp eq <32 x i16> %x, zeroinitializer
559 %b = shufflevector <32 x i1> zeroinitializer, <32 x i1> %a, <32 x i32> <i32 63, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
560 %c = icmp eq <32 x i16> %y, zeroinitializer
561 %d = and <32 x i1> %b, %c
562 %e = bitcast <32 x i1> %d to i32
566 define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
567 ; KNL-LABEL: kshiftr_v64i1_63:
569 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm0
570 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
571 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
572 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
573 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
574 ; KNL-NEXT: kshiftrw $15, %k0, %k1
575 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm0
576 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
577 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
578 ; KNL-NEXT: kmovw %k0, %eax
579 ; KNL-NEXT: movzwl %ax, %eax
580 ; KNL-NEXT: vzeroupper
583 ; SKX-LABEL: kshiftr_v64i1_63:
585 ; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
586 ; SKX-NEXT: kshiftrq $63, %k0, %k1
587 ; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k0 {%k1}
588 ; SKX-NEXT: kmovq %k0, %rax
589 ; SKX-NEXT: vzeroupper
591 %a = icmp eq <64 x i8> %x, zeroinitializer
592 %b = shufflevector <64 x i1> zeroinitializer, <64 x i1> %a, <64 x i32> <i32 127, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
593 %c = icmp eq <64 x i8> %y, zeroinitializer
594 %d = and <64 x i1> %b, %c
595 %e = bitcast <64 x i1> %d to i64
599 define i8 @kshiftl_v8i1_zu123u56(<8 x i64> %x, <8 x i64> %y) {
600 ; KNL-LABEL: kshiftl_v8i1_zu123u56:
602 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
603 ; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
604 ; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
605 ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = <8,u,1,2,3,u,5,6>
606 ; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm3
607 ; KNL-NEXT: vpsllq $63, %zmm3, %zmm0
608 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
609 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
610 ; KNL-NEXT: kmovw %k0, %eax
611 ; KNL-NEXT: # kill: def $al killed $al killed $eax
612 ; KNL-NEXT: vzeroupper
615 ; SKX-LABEL: kshiftl_v8i1_zu123u56:
617 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
618 ; SKX-NEXT: kshiftlb $1, %k0, %k1
619 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
620 ; SKX-NEXT: kmovd %k0, %eax
621 ; SKX-NEXT: # kill: def $al killed $al killed $eax
622 ; SKX-NEXT: vzeroupper
624 %a = icmp eq <8 x i64> %x, zeroinitializer
625 %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 8, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 5, i32 6>
626 %c = icmp eq <8 x i64> %y, zeroinitializer
627 %d = and <8 x i1> %b, %c
628 %e = bitcast <8 x i1> %d to i8
632 define i8 @kshiftl_v8i1_u0123456(<8 x i64> %x, <8 x i64> %y) {
633 ; KNL-LABEL: kshiftl_v8i1_u0123456:
635 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
636 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
637 ; KNL-NEXT: valignq {{.*#+}} zmm0 = zmm0[7,0,1,2,3,4,5,6]
638 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
639 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
640 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
641 ; KNL-NEXT: kmovw %k0, %eax
642 ; KNL-NEXT: # kill: def $al killed $al killed $eax
643 ; KNL-NEXT: vzeroupper
646 ; SKX-LABEL: kshiftl_v8i1_u0123456:
648 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
649 ; SKX-NEXT: kshiftlb $1, %k0, %k1
650 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
651 ; SKX-NEXT: kmovd %k0, %eax
652 ; SKX-NEXT: # kill: def $al killed $al killed $eax
653 ; SKX-NEXT: vzeroupper
655 %a = icmp eq <8 x i64> %x, zeroinitializer
656 %b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
657 %c = icmp eq <8 x i64> %y, zeroinitializer
658 %d = and <8 x i1> %b, %c
659 %e = bitcast <8 x i1> %d to i8
663 define i8 @kshiftr_v8i1_1u3u567z(<8 x i64> %x, <8 x i64> %y) {
664 ; KNL-LABEL: kshiftr_v8i1_1u3u567z:
666 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
667 ; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
668 ; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
669 ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = <1,u,3,u,5,6,7,15>
670 ; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm3
671 ; KNL-NEXT: vpsllq $63, %zmm3, %zmm0
672 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
673 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
674 ; KNL-NEXT: kmovw %k0, %eax
675 ; KNL-NEXT: # kill: def $al killed $al killed $eax
676 ; KNL-NEXT: vzeroupper
679 ; SKX-LABEL: kshiftr_v8i1_1u3u567z:
681 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
682 ; SKX-NEXT: kshiftrb $1, %k0, %k1
683 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
684 ; SKX-NEXT: kmovd %k0, %eax
685 ; SKX-NEXT: # kill: def $al killed $al killed $eax
686 ; SKX-NEXT: vzeroupper
688 %a = icmp eq <8 x i64> %x, zeroinitializer
689 %b = shufflevector <8 x i1> %a, <8 x i1> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 6, i32 7, i32 8>
690 %c = icmp eq <8 x i64> %y, zeroinitializer
691 %d = and <8 x i1> %b, %c
692 %e = bitcast <8 x i1> %d to i8
696 define i8 @kshiftr_v8i1_234567uu(<8 x i64> %x, <8 x i64> %y) {
697 ; KNL-LABEL: kshiftr_v8i1_234567uu:
699 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1
700 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
701 ; KNL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,0,1]
702 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
703 ; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
704 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
705 ; KNL-NEXT: kmovw %k0, %eax
706 ; KNL-NEXT: # kill: def $al killed $al killed $eax
707 ; KNL-NEXT: vzeroupper
710 ; SKX-LABEL: kshiftr_v8i1_234567uu:
712 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
713 ; SKX-NEXT: kshiftrb $2, %k0, %k1
714 ; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1}
715 ; SKX-NEXT: kmovd %k0, %eax
716 ; SKX-NEXT: # kill: def $al killed $al killed $eax
717 ; SKX-NEXT: vzeroupper
719 %a = icmp eq <8 x i64> %x, zeroinitializer
720 %b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10>
721 %c = icmp eq <8 x i64> %y, zeroinitializer
722 %d = and <8 x i1> %b, %c
723 %e = bitcast <8 x i1> %d to i8