1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
5 define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6 ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
7 ; VLX: # %bb.0: # %entry
8 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
9 ; VLX-NEXT: kmovd %k0, %eax
12 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
13 ; NoVLX: # %bb.0: # %entry
14 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
15 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
16 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
17 ; NoVLX-NEXT: kmovw %k0, %eax
18 ; NoVLX-NEXT: vzeroupper
21 %0 = bitcast <2 x i64> %__a to <16 x i8>
22 %1 = bitcast <2 x i64> %__b to <16 x i8>
23 %2 = icmp eq <16 x i8> %0, %1
24 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
25 %4 = bitcast <32 x i1> %3 to i32
29 define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
30 ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
31 ; VLX: # %bb.0: # %entry
32 ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
33 ; VLX-NEXT: kmovd %k0, %eax
36 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
37 ; NoVLX: # %bb.0: # %entry
38 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
39 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
40 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
41 ; NoVLX-NEXT: kmovw %k0, %eax
42 ; NoVLX-NEXT: vzeroupper
45 %0 = bitcast <2 x i64> %__a to <16 x i8>
46 %load = load <2 x i64>, <2 x i64>* %__b
47 %1 = bitcast <2 x i64> %load to <16 x i8>
48 %2 = icmp eq <16 x i8> %0, %1
49 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
50 %4 = bitcast <32 x i1> %3 to i32
54 define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
55 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
56 ; VLX: # %bb.0: # %entry
57 ; VLX-NEXT: kmovd %edi, %k1
58 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
59 ; VLX-NEXT: kmovd %k0, %eax
62 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
63 ; NoVLX: # %bb.0: # %entry
64 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
65 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
66 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
67 ; NoVLX-NEXT: kmovw %k0, %eax
68 ; NoVLX-NEXT: andl %edi, %eax
69 ; NoVLX-NEXT: vzeroupper
72 %0 = bitcast <2 x i64> %__a to <16 x i8>
73 %1 = bitcast <2 x i64> %__b to <16 x i8>
74 %2 = icmp eq <16 x i8> %0, %1
75 %3 = bitcast i16 %__u to <16 x i1>
76 %4 = and <16 x i1> %2, %3
77 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
78 %6 = bitcast <32 x i1> %5 to i32
82 define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
83 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
84 ; VLX: # %bb.0: # %entry
85 ; VLX-NEXT: kmovd %edi, %k1
86 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
87 ; VLX-NEXT: kmovd %k0, %eax
90 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
91 ; NoVLX: # %bb.0: # %entry
92 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
93 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
94 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
95 ; NoVLX-NEXT: kmovw %k0, %eax
96 ; NoVLX-NEXT: andl %edi, %eax
97 ; NoVLX-NEXT: vzeroupper
100 %0 = bitcast <2 x i64> %__a to <16 x i8>
101 %load = load <2 x i64>, <2 x i64>* %__b
102 %1 = bitcast <2 x i64> %load to <16 x i8>
103 %2 = icmp eq <16 x i8> %0, %1
104 %3 = bitcast i16 %__u to <16 x i1>
105 %4 = and <16 x i1> %2, %3
106 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
107 %6 = bitcast <32 x i1> %5 to i32
112 define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
113 ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
114 ; VLX: # %bb.0: # %entry
115 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
116 ; VLX-NEXT: kmovq %k0, %rax
119 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
120 ; NoVLX: # %bb.0: # %entry
121 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
122 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
123 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
124 ; NoVLX-NEXT: kmovw %k0, %eax
125 ; NoVLX-NEXT: vzeroupper
128 %0 = bitcast <2 x i64> %__a to <16 x i8>
129 %1 = bitcast <2 x i64> %__b to <16 x i8>
130 %2 = icmp eq <16 x i8> %0, %1
131 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
132 %4 = bitcast <64 x i1> %3 to i64
136 define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
137 ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
138 ; VLX: # %bb.0: # %entry
139 ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
140 ; VLX-NEXT: kmovq %k0, %rax
143 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
144 ; NoVLX: # %bb.0: # %entry
145 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
146 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
147 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
148 ; NoVLX-NEXT: kmovw %k0, %eax
149 ; NoVLX-NEXT: vzeroupper
152 %0 = bitcast <2 x i64> %__a to <16 x i8>
153 %load = load <2 x i64>, <2 x i64>* %__b
154 %1 = bitcast <2 x i64> %load to <16 x i8>
155 %2 = icmp eq <16 x i8> %0, %1
156 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
157 %4 = bitcast <64 x i1> %3 to i64
161 define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
162 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
163 ; VLX: # %bb.0: # %entry
164 ; VLX-NEXT: kmovd %edi, %k1
165 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
166 ; VLX-NEXT: kmovq %k0, %rax
169 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
170 ; NoVLX: # %bb.0: # %entry
171 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
172 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
173 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
174 ; NoVLX-NEXT: kmovw %k0, %eax
175 ; NoVLX-NEXT: andl %edi, %eax
176 ; NoVLX-NEXT: vzeroupper
179 %0 = bitcast <2 x i64> %__a to <16 x i8>
180 %1 = bitcast <2 x i64> %__b to <16 x i8>
181 %2 = icmp eq <16 x i8> %0, %1
182 %3 = bitcast i16 %__u to <16 x i1>
183 %4 = and <16 x i1> %2, %3
184 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
185 %6 = bitcast <64 x i1> %5 to i64
189 define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
190 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
191 ; VLX: # %bb.0: # %entry
192 ; VLX-NEXT: kmovd %edi, %k1
193 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
194 ; VLX-NEXT: kmovq %k0, %rax
197 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
198 ; NoVLX: # %bb.0: # %entry
199 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
200 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
201 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
202 ; NoVLX-NEXT: kmovw %k0, %eax
203 ; NoVLX-NEXT: andl %edi, %eax
204 ; NoVLX-NEXT: vzeroupper
207 %0 = bitcast <2 x i64> %__a to <16 x i8>
208 %load = load <2 x i64>, <2 x i64>* %__b
209 %1 = bitcast <2 x i64> %load to <16 x i8>
210 %2 = icmp eq <16 x i8> %0, %1
211 %3 = bitcast i16 %__u to <16 x i1>
212 %4 = and <16 x i1> %2, %3
213 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
214 %6 = bitcast <64 x i1> %5 to i64
219 define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
220 ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
221 ; VLX: # %bb.0: # %entry
222 ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
223 ; VLX-NEXT: kmovq %k0, %rax
224 ; VLX-NEXT: vzeroupper
227 ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
228 ; NoVLX: # %bb.0: # %entry
229 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
230 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
231 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
232 ; NoVLX-NEXT: kmovw %k0, %ecx
233 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
234 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
235 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
236 ; NoVLX-NEXT: kmovw %k0, %eax
237 ; NoVLX-NEXT: shll $16, %eax
238 ; NoVLX-NEXT: orl %ecx, %eax
239 ; NoVLX-NEXT: vzeroupper
242 %0 = bitcast <4 x i64> %__a to <32 x i8>
243 %1 = bitcast <4 x i64> %__b to <32 x i8>
244 %2 = icmp eq <32 x i8> %0, %1
245 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
246 %4 = bitcast <64 x i1> %3 to i64
250 define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
251 ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
252 ; VLX: # %bb.0: # %entry
253 ; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
254 ; VLX-NEXT: kmovq %k0, %rax
255 ; VLX-NEXT: vzeroupper
258 ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
259 ; NoVLX: # %bb.0: # %entry
260 ; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0
261 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
262 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
263 ; NoVLX-NEXT: kmovw %k0, %ecx
264 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
265 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
266 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
267 ; NoVLX-NEXT: kmovw %k0, %eax
268 ; NoVLX-NEXT: shll $16, %eax
269 ; NoVLX-NEXT: orl %ecx, %eax
270 ; NoVLX-NEXT: vzeroupper
273 %0 = bitcast <4 x i64> %__a to <32 x i8>
274 %load = load <4 x i64>, <4 x i64>* %__b
275 %1 = bitcast <4 x i64> %load to <32 x i8>
276 %2 = icmp eq <32 x i8> %0, %1
277 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
278 %4 = bitcast <64 x i1> %3 to i64
282 define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
283 ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
284 ; VLX: # %bb.0: # %entry
285 ; VLX-NEXT: kmovd %edi, %k1
286 ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
287 ; VLX-NEXT: kmovq %k0, %rax
288 ; VLX-NEXT: vzeroupper
291 ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
292 ; NoVLX: # %bb.0: # %entry
293 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
294 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
295 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
296 ; NoVLX-NEXT: kmovw %k0, %eax
297 ; NoVLX-NEXT: andl %edi, %eax
298 ; NoVLX-NEXT: shrl $16, %edi
299 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
300 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
301 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
302 ; NoVLX-NEXT: kmovw %k0, %ecx
303 ; NoVLX-NEXT: andl %edi, %ecx
304 ; NoVLX-NEXT: shll $16, %ecx
305 ; NoVLX-NEXT: movzwl %ax, %eax
306 ; NoVLX-NEXT: orl %ecx, %eax
307 ; NoVLX-NEXT: vzeroupper
310 %0 = bitcast <4 x i64> %__a to <32 x i8>
311 %1 = bitcast <4 x i64> %__b to <32 x i8>
312 %2 = icmp eq <32 x i8> %0, %1
313 %3 = bitcast i32 %__u to <32 x i1>
314 %4 = and <32 x i1> %2, %3
315 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
316 %6 = bitcast <64 x i1> %5 to i64
320 define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
321 ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
322 ; VLX: # %bb.0: # %entry
323 ; VLX-NEXT: kmovd %edi, %k1
324 ; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
325 ; VLX-NEXT: kmovq %k0, %rax
326 ; VLX-NEXT: vzeroupper
329 ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
330 ; NoVLX: # %bb.0: # %entry
331 ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
332 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
333 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
334 ; NoVLX-NEXT: kmovw %k0, %eax
335 ; NoVLX-NEXT: andl %edi, %eax
336 ; NoVLX-NEXT: shrl $16, %edi
337 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
338 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
339 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
340 ; NoVLX-NEXT: kmovw %k0, %ecx
341 ; NoVLX-NEXT: andl %edi, %ecx
342 ; NoVLX-NEXT: shll $16, %ecx
343 ; NoVLX-NEXT: movzwl %ax, %eax
344 ; NoVLX-NEXT: orl %ecx, %eax
345 ; NoVLX-NEXT: vzeroupper
348 %0 = bitcast <4 x i64> %__a to <32 x i8>
349 %load = load <4 x i64>, <4 x i64>* %__b
350 %1 = bitcast <4 x i64> %load to <32 x i8>
351 %2 = icmp eq <32 x i8> %0, %1
352 %3 = bitcast i32 %__u to <32 x i1>
353 %4 = and <32 x i1> %2, %3
354 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
355 %6 = bitcast <64 x i1> %5 to i64
360 define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
361 ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
362 ; VLX: # %bb.0: # %entry
363 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
364 ; VLX-NEXT: kmovd %k0, %eax
365 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
368 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
369 ; NoVLX: # %bb.0: # %entry
370 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
371 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
372 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
373 ; NoVLX-NEXT: kmovw %k0, %eax
374 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
375 ; NoVLX-NEXT: vzeroupper
378 %0 = bitcast <2 x i64> %__a to <8 x i16>
379 %1 = bitcast <2 x i64> %__b to <8 x i16>
380 %2 = icmp eq <8 x i16> %0, %1
381 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
382 %4 = bitcast <16 x i1> %3 to i16
386 define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
387 ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
388 ; VLX: # %bb.0: # %entry
389 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
390 ; VLX-NEXT: kmovd %k0, %eax
391 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
394 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
395 ; NoVLX: # %bb.0: # %entry
396 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
397 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
398 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
399 ; NoVLX-NEXT: kmovw %k0, %eax
400 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
401 ; NoVLX-NEXT: vzeroupper
404 %0 = bitcast <2 x i64> %__a to <8 x i16>
405 %load = load <2 x i64>, <2 x i64>* %__b
406 %1 = bitcast <2 x i64> %load to <8 x i16>
407 %2 = icmp eq <8 x i16> %0, %1
408 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
409 %4 = bitcast <16 x i1> %3 to i16
413 define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
414 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
415 ; VLX: # %bb.0: # %entry
416 ; VLX-NEXT: kmovd %edi, %k1
417 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
418 ; VLX-NEXT: kmovd %k0, %eax
419 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
422 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
423 ; NoVLX: # %bb.0: # %entry
424 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
425 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
426 ; NoVLX-NEXT: kmovw %edi, %k1
427 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
428 ; NoVLX-NEXT: kmovw %k0, %eax
429 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
430 ; NoVLX-NEXT: vzeroupper
433 %0 = bitcast <2 x i64> %__a to <8 x i16>
434 %1 = bitcast <2 x i64> %__b to <8 x i16>
435 %2 = icmp eq <8 x i16> %0, %1
436 %3 = bitcast i8 %__u to <8 x i1>
437 %4 = and <8 x i1> %2, %3
438 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
439 %6 = bitcast <16 x i1> %5 to i16
443 define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
444 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
445 ; VLX: # %bb.0: # %entry
446 ; VLX-NEXT: kmovd %edi, %k1
447 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
448 ; VLX-NEXT: kmovd %k0, %eax
449 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
452 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
453 ; NoVLX: # %bb.0: # %entry
454 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
455 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
456 ; NoVLX-NEXT: kmovw %edi, %k1
457 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
458 ; NoVLX-NEXT: kmovw %k0, %eax
459 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
460 ; NoVLX-NEXT: vzeroupper
463 %0 = bitcast <2 x i64> %__a to <8 x i16>
464 %load = load <2 x i64>, <2 x i64>* %__b
465 %1 = bitcast <2 x i64> %load to <8 x i16>
466 %2 = icmp eq <8 x i16> %0, %1
467 %3 = bitcast i8 %__u to <8 x i1>
468 %4 = and <8 x i1> %2, %3
469 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
470 %6 = bitcast <16 x i1> %5 to i16
475 define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
476 ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
477 ; VLX: # %bb.0: # %entry
478 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
479 ; VLX-NEXT: kmovd %k0, %eax
482 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
483 ; NoVLX: # %bb.0: # %entry
484 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
485 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
486 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
487 ; NoVLX-NEXT: kmovw %k0, %eax
488 ; NoVLX-NEXT: vzeroupper
491 %0 = bitcast <2 x i64> %__a to <8 x i16>
492 %1 = bitcast <2 x i64> %__b to <8 x i16>
493 %2 = icmp eq <8 x i16> %0, %1
494 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
495 %4 = bitcast <32 x i1> %3 to i32
499 define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
500 ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
501 ; VLX: # %bb.0: # %entry
502 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
503 ; VLX-NEXT: kmovd %k0, %eax
506 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
507 ; NoVLX: # %bb.0: # %entry
508 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
509 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
510 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
511 ; NoVLX-NEXT: kmovw %k0, %eax
512 ; NoVLX-NEXT: vzeroupper
515 %0 = bitcast <2 x i64> %__a to <8 x i16>
516 %load = load <2 x i64>, <2 x i64>* %__b
517 %1 = bitcast <2 x i64> %load to <8 x i16>
518 %2 = icmp eq <8 x i16> %0, %1
519 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
520 %4 = bitcast <32 x i1> %3 to i32
524 define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
525 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
526 ; VLX: # %bb.0: # %entry
527 ; VLX-NEXT: kmovd %edi, %k1
528 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
529 ; VLX-NEXT: kmovd %k0, %eax
532 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
533 ; NoVLX: # %bb.0: # %entry
534 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
535 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
536 ; NoVLX-NEXT: kmovw %edi, %k1
537 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
538 ; NoVLX-NEXT: kmovw %k0, %eax
539 ; NoVLX-NEXT: vzeroupper
542 %0 = bitcast <2 x i64> %__a to <8 x i16>
543 %1 = bitcast <2 x i64> %__b to <8 x i16>
544 %2 = icmp eq <8 x i16> %0, %1
545 %3 = bitcast i8 %__u to <8 x i1>
546 %4 = and <8 x i1> %2, %3
547 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
548 %6 = bitcast <32 x i1> %5 to i32
552 define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
553 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
554 ; VLX: # %bb.0: # %entry
555 ; VLX-NEXT: kmovd %edi, %k1
556 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
557 ; VLX-NEXT: kmovd %k0, %eax
560 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
561 ; NoVLX: # %bb.0: # %entry
562 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
563 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
564 ; NoVLX-NEXT: kmovw %edi, %k1
565 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
566 ; NoVLX-NEXT: kmovw %k0, %eax
567 ; NoVLX-NEXT: vzeroupper
570 %0 = bitcast <2 x i64> %__a to <8 x i16>
571 %load = load <2 x i64>, <2 x i64>* %__b
572 %1 = bitcast <2 x i64> %load to <8 x i16>
573 %2 = icmp eq <8 x i16> %0, %1
574 %3 = bitcast i8 %__u to <8 x i1>
575 %4 = and <8 x i1> %2, %3
576 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
577 %6 = bitcast <32 x i1> %5 to i32
582 define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
583 ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
584 ; VLX: # %bb.0: # %entry
585 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
586 ; VLX-NEXT: kmovq %k0, %rax
589 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
590 ; NoVLX: # %bb.0: # %entry
591 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
592 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
593 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
594 ; NoVLX-NEXT: kmovw %k0, %eax
595 ; NoVLX-NEXT: vzeroupper
598 %0 = bitcast <2 x i64> %__a to <8 x i16>
599 %1 = bitcast <2 x i64> %__b to <8 x i16>
600 %2 = icmp eq <8 x i16> %0, %1
601 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
602 %4 = bitcast <64 x i1> %3 to i64
606 define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
607 ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
608 ; VLX: # %bb.0: # %entry
609 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
610 ; VLX-NEXT: kmovq %k0, %rax
613 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
614 ; NoVLX: # %bb.0: # %entry
615 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
616 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
617 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
618 ; NoVLX-NEXT: kmovw %k0, %eax
619 ; NoVLX-NEXT: vzeroupper
622 %0 = bitcast <2 x i64> %__a to <8 x i16>
623 %load = load <2 x i64>, <2 x i64>* %__b
624 %1 = bitcast <2 x i64> %load to <8 x i16>
625 %2 = icmp eq <8 x i16> %0, %1
626 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
627 %4 = bitcast <64 x i1> %3 to i64
631 define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
632 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
633 ; VLX: # %bb.0: # %entry
634 ; VLX-NEXT: kmovd %edi, %k1
635 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
636 ; VLX-NEXT: kmovq %k0, %rax
639 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
640 ; NoVLX: # %bb.0: # %entry
641 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
642 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
643 ; NoVLX-NEXT: kmovw %edi, %k1
644 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
645 ; NoVLX-NEXT: kmovw %k0, %eax
646 ; NoVLX-NEXT: vzeroupper
649 %0 = bitcast <2 x i64> %__a to <8 x i16>
650 %1 = bitcast <2 x i64> %__b to <8 x i16>
651 %2 = icmp eq <8 x i16> %0, %1
652 %3 = bitcast i8 %__u to <8 x i1>
653 %4 = and <8 x i1> %2, %3
654 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
655 %6 = bitcast <64 x i1> %5 to i64
659 define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
660 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
661 ; VLX: # %bb.0: # %entry
662 ; VLX-NEXT: kmovd %edi, %k1
663 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
664 ; VLX-NEXT: kmovq %k0, %rax
667 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
668 ; NoVLX: # %bb.0: # %entry
669 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
670 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
671 ; NoVLX-NEXT: kmovw %edi, %k1
672 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
673 ; NoVLX-NEXT: kmovw %k0, %eax
674 ; NoVLX-NEXT: vzeroupper
677 %0 = bitcast <2 x i64> %__a to <8 x i16>
678 %load = load <2 x i64>, <2 x i64>* %__b
679 %1 = bitcast <2 x i64> %load to <8 x i16>
680 %2 = icmp eq <8 x i16> %0, %1
681 %3 = bitcast i8 %__u to <8 x i1>
682 %4 = and <8 x i1> %2, %3
683 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
684 %6 = bitcast <64 x i1> %5 to i64
689 define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
690 ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
691 ; VLX: # %bb.0: # %entry
692 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
693 ; VLX-NEXT: kmovd %k0, %eax
694 ; VLX-NEXT: vzeroupper
697 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
698 ; NoVLX: # %bb.0: # %entry
699 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
700 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
701 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
702 ; NoVLX-NEXT: kmovw %k0, %eax
703 ; NoVLX-NEXT: vzeroupper
706 %0 = bitcast <4 x i64> %__a to <16 x i16>
707 %1 = bitcast <4 x i64> %__b to <16 x i16>
708 %2 = icmp eq <16 x i16> %0, %1
709 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
710 %4 = bitcast <32 x i1> %3 to i32
714 define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
715 ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
716 ; VLX: # %bb.0: # %entry
717 ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
718 ; VLX-NEXT: kmovd %k0, %eax
719 ; VLX-NEXT: vzeroupper
722 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
723 ; NoVLX: # %bb.0: # %entry
724 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
725 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
726 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
727 ; NoVLX-NEXT: kmovw %k0, %eax
728 ; NoVLX-NEXT: vzeroupper
731 %0 = bitcast <4 x i64> %__a to <16 x i16>
732 %load = load <4 x i64>, <4 x i64>* %__b
733 %1 = bitcast <4 x i64> %load to <16 x i16>
734 %2 = icmp eq <16 x i16> %0, %1
735 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
736 %4 = bitcast <32 x i1> %3 to i32
740 define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
741 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
742 ; VLX: # %bb.0: # %entry
743 ; VLX-NEXT: kmovd %edi, %k1
744 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
745 ; VLX-NEXT: kmovd %k0, %eax
746 ; VLX-NEXT: vzeroupper
749 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
750 ; NoVLX: # %bb.0: # %entry
751 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
752 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
753 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
754 ; NoVLX-NEXT: kmovw %k0, %eax
755 ; NoVLX-NEXT: andl %edi, %eax
756 ; NoVLX-NEXT: vzeroupper
759 %0 = bitcast <4 x i64> %__a to <16 x i16>
760 %1 = bitcast <4 x i64> %__b to <16 x i16>
761 %2 = icmp eq <16 x i16> %0, %1
762 %3 = bitcast i16 %__u to <16 x i1>
763 %4 = and <16 x i1> %2, %3
764 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
765 %6 = bitcast <32 x i1> %5 to i32
769 define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
770 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
771 ; VLX: # %bb.0: # %entry
772 ; VLX-NEXT: kmovd %edi, %k1
773 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
774 ; VLX-NEXT: kmovd %k0, %eax
775 ; VLX-NEXT: vzeroupper
778 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
779 ; NoVLX: # %bb.0: # %entry
780 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
781 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
782 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
783 ; NoVLX-NEXT: kmovw %k0, %eax
784 ; NoVLX-NEXT: andl %edi, %eax
785 ; NoVLX-NEXT: vzeroupper
788 %0 = bitcast <4 x i64> %__a to <16 x i16>
789 %load = load <4 x i64>, <4 x i64>* %__b
790 %1 = bitcast <4 x i64> %load to <16 x i16>
791 %2 = icmp eq <16 x i16> %0, %1
792 %3 = bitcast i16 %__u to <16 x i1>
793 %4 = and <16 x i1> %2, %3
794 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
795 %6 = bitcast <32 x i1> %5 to i32
800 define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
801 ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
802 ; VLX: # %bb.0: # %entry
803 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
804 ; VLX-NEXT: kmovq %k0, %rax
805 ; VLX-NEXT: vzeroupper
808 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
809 ; NoVLX: # %bb.0: # %entry
810 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
811 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
812 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
813 ; NoVLX-NEXT: kmovw %k0, %eax
814 ; NoVLX-NEXT: vzeroupper
817 %0 = bitcast <4 x i64> %__a to <16 x i16>
818 %1 = bitcast <4 x i64> %__b to <16 x i16>
819 %2 = icmp eq <16 x i16> %0, %1
820 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
821 %4 = bitcast <64 x i1> %3 to i64
825 define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
826 ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
827 ; VLX: # %bb.0: # %entry
828 ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
829 ; VLX-NEXT: kmovq %k0, %rax
830 ; VLX-NEXT: vzeroupper
833 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
834 ; NoVLX: # %bb.0: # %entry
835 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
836 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
837 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
838 ; NoVLX-NEXT: kmovw %k0, %eax
839 ; NoVLX-NEXT: vzeroupper
842 %0 = bitcast <4 x i64> %__a to <16 x i16>
843 %load = load <4 x i64>, <4 x i64>* %__b
844 %1 = bitcast <4 x i64> %load to <16 x i16>
845 %2 = icmp eq <16 x i16> %0, %1
846 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
847 %4 = bitcast <64 x i1> %3 to i64
851 define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
852 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
853 ; VLX: # %bb.0: # %entry
854 ; VLX-NEXT: kmovd %edi, %k1
855 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
856 ; VLX-NEXT: kmovq %k0, %rax
857 ; VLX-NEXT: vzeroupper
860 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
861 ; NoVLX: # %bb.0: # %entry
862 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
863 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
864 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
865 ; NoVLX-NEXT: kmovw %k0, %eax
866 ; NoVLX-NEXT: andl %edi, %eax
867 ; NoVLX-NEXT: vzeroupper
870 %0 = bitcast <4 x i64> %__a to <16 x i16>
871 %1 = bitcast <4 x i64> %__b to <16 x i16>
872 %2 = icmp eq <16 x i16> %0, %1
873 %3 = bitcast i16 %__u to <16 x i1>
874 %4 = and <16 x i1> %2, %3
875 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
876 %6 = bitcast <64 x i1> %5 to i64
880 define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
881 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
882 ; VLX: # %bb.0: # %entry
883 ; VLX-NEXT: kmovd %edi, %k1
884 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
885 ; VLX-NEXT: kmovq %k0, %rax
886 ; VLX-NEXT: vzeroupper
889 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
890 ; NoVLX: # %bb.0: # %entry
891 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
892 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
893 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
894 ; NoVLX-NEXT: kmovw %k0, %eax
895 ; NoVLX-NEXT: andl %edi, %eax
896 ; NoVLX-NEXT: vzeroupper
899 %0 = bitcast <4 x i64> %__a to <16 x i16>
900 %load = load <4 x i64>, <4 x i64>* %__b
901 %1 = bitcast <4 x i64> %load to <16 x i16>
902 %2 = icmp eq <16 x i16> %0, %1
903 %3 = bitcast i16 %__u to <16 x i1>
904 %4 = and <16 x i1> %2, %3
905 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
906 %6 = bitcast <64 x i1> %5 to i64
911 define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
912 ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
913 ; VLX: # %bb.0: # %entry
914 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
915 ; VLX-NEXT: kmovq %k0, %rax
916 ; VLX-NEXT: vzeroupper
919 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
920 ; NoVLX: # %bb.0: # %entry
921 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
922 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
923 ; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
924 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
925 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
926 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
927 ; NoVLX-NEXT: kmovw %k0, %ecx
928 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
929 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
930 ; NoVLX-NEXT: kmovw %k0, %eax
931 ; NoVLX-NEXT: shll $16, %eax
932 ; NoVLX-NEXT: orl %ecx, %eax
933 ; NoVLX-NEXT: vzeroupper
936 %0 = bitcast <8 x i64> %__a to <32 x i16>
937 %1 = bitcast <8 x i64> %__b to <32 x i16>
938 %2 = icmp eq <32 x i16> %0, %1
939 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
940 %4 = bitcast <64 x i1> %3 to i64
944 define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
945 ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
946 ; VLX: # %bb.0: # %entry
947 ; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
948 ; VLX-NEXT: kmovq %k0, %rax
949 ; VLX-NEXT: vzeroupper
952 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
953 ; NoVLX: # %bb.0: # %entry
954 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
955 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
956 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
957 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
958 ; NoVLX-NEXT: kmovw %k0, %ecx
959 ; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm0
960 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
961 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
962 ; NoVLX-NEXT: kmovw %k0, %eax
963 ; NoVLX-NEXT: shll $16, %eax
964 ; NoVLX-NEXT: orl %ecx, %eax
965 ; NoVLX-NEXT: vzeroupper
968 %0 = bitcast <8 x i64> %__a to <32 x i16>
969 %load = load <8 x i64>, <8 x i64>* %__b
970 %1 = bitcast <8 x i64> %load to <32 x i16>
971 %2 = icmp eq <32 x i16> %0, %1
972 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
973 %4 = bitcast <64 x i1> %3 to i64
977 define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
978 ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
979 ; VLX: # %bb.0: # %entry
980 ; VLX-NEXT: kmovd %edi, %k1
981 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
982 ; VLX-NEXT: kmovq %k0, %rax
983 ; VLX-NEXT: vzeroupper
986 ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
987 ; NoVLX: # %bb.0: # %entry
988 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2
989 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
990 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
991 ; NoVLX-NEXT: kmovw %k0, %eax
992 ; NoVLX-NEXT: andl %edi, %eax
993 ; NoVLX-NEXT: shrl $16, %edi
994 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
995 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
996 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
997 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
998 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
999 ; NoVLX-NEXT: kmovw %k0, %ecx
1000 ; NoVLX-NEXT: andl %edi, %ecx
1001 ; NoVLX-NEXT: shll $16, %ecx
1002 ; NoVLX-NEXT: movzwl %ax, %eax
1003 ; NoVLX-NEXT: orl %ecx, %eax
1004 ; NoVLX-NEXT: vzeroupper
1007 %0 = bitcast <8 x i64> %__a to <32 x i16>
1008 %1 = bitcast <8 x i64> %__b to <32 x i16>
1009 %2 = icmp eq <32 x i16> %0, %1
1010 %3 = bitcast i32 %__u to <32 x i1>
1011 %4 = and <32 x i1> %2, %3
1012 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1013 %6 = bitcast <64 x i1> %5 to i64
1017 define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
1018 ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1019 ; VLX: # %bb.0: # %entry
1020 ; VLX-NEXT: kmovd %edi, %k1
1021 ; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
1022 ; VLX-NEXT: kmovq %k0, %rax
1023 ; VLX-NEXT: vzeroupper
1026 ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1027 ; NoVLX: # %bb.0: # %entry
1028 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm1
1029 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
1030 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1031 ; NoVLX-NEXT: kmovw %k0, %eax
1032 ; NoVLX-NEXT: andl %edi, %eax
1033 ; NoVLX-NEXT: shrl $16, %edi
1034 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1035 ; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0
1036 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1037 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1038 ; NoVLX-NEXT: kmovw %k0, %ecx
1039 ; NoVLX-NEXT: andl %edi, %ecx
1040 ; NoVLX-NEXT: shll $16, %ecx
1041 ; NoVLX-NEXT: movzwl %ax, %eax
1042 ; NoVLX-NEXT: orl %ecx, %eax
1043 ; NoVLX-NEXT: vzeroupper
1046 %0 = bitcast <8 x i64> %__a to <32 x i16>
1047 %load = load <8 x i64>, <8 x i64>* %__b
1048 %1 = bitcast <8 x i64> %load to <32 x i16>
1049 %2 = icmp eq <32 x i16> %0, %1
1050 %3 = bitcast i32 %__u to <32 x i1>
1051 %4 = and <32 x i1> %2, %3
1052 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1053 %6 = bitcast <64 x i1> %5 to i64
1058 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1059 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1060 ; VLX: # %bb.0: # %entry
1061 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1062 ; VLX-NEXT: kmovd %k0, %eax
1063 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1066 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1067 ; NoVLX: # %bb.0: # %entry
1068 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1069 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1070 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1071 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1072 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1073 ; NoVLX-NEXT: kmovw %k0, %eax
1074 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1075 ; NoVLX-NEXT: vzeroupper
1078 %0 = bitcast <2 x i64> %__a to <4 x i32>
1079 %1 = bitcast <2 x i64> %__b to <4 x i32>
1080 %2 = icmp eq <4 x i32> %0, %1
1081 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1082 %4 = bitcast <8 x i1> %3 to i8
1086 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1087 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1088 ; VLX: # %bb.0: # %entry
1089 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1090 ; VLX-NEXT: kmovd %k0, %eax
1091 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1094 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1095 ; NoVLX: # %bb.0: # %entry
1096 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1097 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1098 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1099 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1100 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1101 ; NoVLX-NEXT: kmovw %k0, %eax
1102 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1103 ; NoVLX-NEXT: vzeroupper
1106 %0 = bitcast <2 x i64> %__a to <4 x i32>
1107 %load = load <2 x i64>, <2 x i64>* %__b
1108 %1 = bitcast <2 x i64> %load to <4 x i32>
1109 %2 = icmp eq <4 x i32> %0, %1
1110 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1111 %4 = bitcast <8 x i1> %3 to i8
1115 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1116 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1117 ; VLX: # %bb.0: # %entry
1118 ; VLX-NEXT: kmovd %edi, %k1
1119 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1120 ; VLX-NEXT: kmovd %k0, %eax
1121 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1124 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1125 ; NoVLX: # %bb.0: # %entry
1126 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1127 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1128 ; NoVLX-NEXT: kmovw %edi, %k1
1129 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1130 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1131 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1132 ; NoVLX-NEXT: kmovw %k0, %eax
1133 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1134 ; NoVLX-NEXT: vzeroupper
1137 %0 = bitcast <2 x i64> %__a to <4 x i32>
1138 %1 = bitcast <2 x i64> %__b to <4 x i32>
1139 %2 = icmp eq <4 x i32> %0, %1
1140 %3 = bitcast i8 %__u to <8 x i1>
1141 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1142 %4 = and <4 x i1> %2, %extract.i
1143 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1144 %6 = bitcast <8 x i1> %5 to i8
1148 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1149 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1150 ; VLX: # %bb.0: # %entry
1151 ; VLX-NEXT: kmovd %edi, %k1
1152 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1153 ; VLX-NEXT: kmovd %k0, %eax
1154 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1157 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1158 ; NoVLX: # %bb.0: # %entry
1159 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1160 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1161 ; NoVLX-NEXT: kmovw %edi, %k1
1162 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1163 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1164 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1165 ; NoVLX-NEXT: kmovw %k0, %eax
1166 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1167 ; NoVLX-NEXT: vzeroupper
1170 %0 = bitcast <2 x i64> %__a to <4 x i32>
1171 %load = load <2 x i64>, <2 x i64>* %__b
1172 %1 = bitcast <2 x i64> %load to <4 x i32>
1173 %2 = icmp eq <4 x i32> %0, %1
1174 %3 = bitcast i8 %__u to <8 x i1>
1175 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1176 %4 = and <4 x i1> %2, %extract.i
1177 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1178 %6 = bitcast <8 x i1> %5 to i8
1183 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1184 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1185 ; VLX: # %bb.0: # %entry
1186 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1187 ; VLX-NEXT: kmovd %k0, %eax
1188 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1191 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1192 ; NoVLX: # %bb.0: # %entry
1193 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1194 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1195 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1196 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1197 ; NoVLX-NEXT: kmovw %k0, %eax
1198 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1199 ; NoVLX-NEXT: vzeroupper
1202 %0 = bitcast <2 x i64> %__a to <4 x i32>
1203 %load = load i32, i32* %__b
1204 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1205 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1206 %2 = icmp eq <4 x i32> %0, %1
1207 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1208 %4 = bitcast <8 x i1> %3 to i8
1212 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1213 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1214 ; VLX: # %bb.0: # %entry
1215 ; VLX-NEXT: kmovd %edi, %k1
1216 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1217 ; VLX-NEXT: kmovd %k0, %eax
1218 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1221 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1222 ; NoVLX: # %bb.0: # %entry
1223 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1224 ; NoVLX-NEXT: kmovw %edi, %k1
1225 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1226 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1227 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1228 ; NoVLX-NEXT: kmovw %k0, %eax
1229 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1230 ; NoVLX-NEXT: vzeroupper
1233 %0 = bitcast <2 x i64> %__a to <4 x i32>
1234 %load = load i32, i32* %__b
1235 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1236 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1237 %2 = icmp eq <4 x i32> %0, %1
1238 %3 = bitcast i8 %__u to <8 x i1>
1239 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1240 %4 = and <4 x i1> %extract.i, %2
1241 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1242 %6 = bitcast <8 x i1> %5 to i8
1247 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1248 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1249 ; VLX: # %bb.0: # %entry
1250 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1251 ; VLX-NEXT: kmovd %k0, %eax
1252 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1255 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1256 ; NoVLX: # %bb.0: # %entry
1257 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1258 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1259 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1260 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1261 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1262 ; NoVLX-NEXT: kmovw %k0, %eax
1263 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1264 ; NoVLX-NEXT: vzeroupper
1267 %0 = bitcast <2 x i64> %__a to <4 x i32>
1268 %1 = bitcast <2 x i64> %__b to <4 x i32>
1269 %2 = icmp eq <4 x i32> %0, %1
1270 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1271 %4 = bitcast <16 x i1> %3 to i16
1275 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1276 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1277 ; VLX: # %bb.0: # %entry
1278 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1279 ; VLX-NEXT: kmovd %k0, %eax
1280 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1283 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1284 ; NoVLX: # %bb.0: # %entry
1285 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1286 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1287 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1288 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1289 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1290 ; NoVLX-NEXT: kmovw %k0, %eax
1291 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1292 ; NoVLX-NEXT: vzeroupper
1295 %0 = bitcast <2 x i64> %__a to <4 x i32>
1296 %load = load <2 x i64>, <2 x i64>* %__b
1297 %1 = bitcast <2 x i64> %load to <4 x i32>
1298 %2 = icmp eq <4 x i32> %0, %1
1299 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1300 %4 = bitcast <16 x i1> %3 to i16
1304 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1305 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1306 ; VLX: # %bb.0: # %entry
1307 ; VLX-NEXT: kmovd %edi, %k1
1308 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1309 ; VLX-NEXT: kmovd %k0, %eax
1310 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1313 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1314 ; NoVLX: # %bb.0: # %entry
1315 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1316 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1317 ; NoVLX-NEXT: kmovw %edi, %k1
1318 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1319 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1320 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1321 ; NoVLX-NEXT: kmovw %k0, %eax
1322 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1323 ; NoVLX-NEXT: vzeroupper
1326 %0 = bitcast <2 x i64> %__a to <4 x i32>
1327 %1 = bitcast <2 x i64> %__b to <4 x i32>
1328 %2 = icmp eq <4 x i32> %0, %1
1329 %3 = bitcast i8 %__u to <8 x i1>
1330 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1331 %4 = and <4 x i1> %2, %extract.i
1332 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1333 %6 = bitcast <16 x i1> %5 to i16
1337 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1338 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1339 ; VLX: # %bb.0: # %entry
1340 ; VLX-NEXT: kmovd %edi, %k1
1341 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1342 ; VLX-NEXT: kmovd %k0, %eax
1343 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1346 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1347 ; NoVLX: # %bb.0: # %entry
1348 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1349 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1350 ; NoVLX-NEXT: kmovw %edi, %k1
1351 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1352 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1353 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1354 ; NoVLX-NEXT: kmovw %k0, %eax
1355 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1356 ; NoVLX-NEXT: vzeroupper
1359 %0 = bitcast <2 x i64> %__a to <4 x i32>
1360 %load = load <2 x i64>, <2 x i64>* %__b
1361 %1 = bitcast <2 x i64> %load to <4 x i32>
1362 %2 = icmp eq <4 x i32> %0, %1
1363 %3 = bitcast i8 %__u to <8 x i1>
1364 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1365 %4 = and <4 x i1> %2, %extract.i
1366 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1367 %6 = bitcast <16 x i1> %5 to i16
1372 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1373 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1374 ; VLX: # %bb.0: # %entry
1375 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1376 ; VLX-NEXT: kmovd %k0, %eax
1377 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1380 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1381 ; NoVLX: # %bb.0: # %entry
1382 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1383 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1384 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1385 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1386 ; NoVLX-NEXT: kmovw %k0, %eax
1387 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1388 ; NoVLX-NEXT: vzeroupper
1391 %0 = bitcast <2 x i64> %__a to <4 x i32>
1392 %load = load i32, i32* %__b
1393 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1394 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1395 %2 = icmp eq <4 x i32> %0, %1
1396 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1397 %4 = bitcast <16 x i1> %3 to i16
1401 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1402 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1403 ; VLX: # %bb.0: # %entry
1404 ; VLX-NEXT: kmovd %edi, %k1
1405 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1406 ; VLX-NEXT: kmovd %k0, %eax
1407 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1410 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1411 ; NoVLX: # %bb.0: # %entry
1412 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1413 ; NoVLX-NEXT: kmovw %edi, %k1
1414 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1415 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1416 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1417 ; NoVLX-NEXT: kmovw %k0, %eax
1418 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1419 ; NoVLX-NEXT: vzeroupper
1422 %0 = bitcast <2 x i64> %__a to <4 x i32>
1423 %load = load i32, i32* %__b
1424 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1425 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1426 %2 = icmp eq <4 x i32> %0, %1
1427 %3 = bitcast i8 %__u to <8 x i1>
1428 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1429 %4 = and <4 x i1> %extract.i, %2
1430 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1431 %6 = bitcast <16 x i1> %5 to i16
1436 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1437 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1438 ; VLX: # %bb.0: # %entry
1439 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1440 ; VLX-NEXT: kmovd %k0, %eax
1443 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1444 ; NoVLX: # %bb.0: # %entry
1445 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1446 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1447 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1448 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1449 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1450 ; NoVLX-NEXT: kmovw %k0, %eax
1451 ; NoVLX-NEXT: vzeroupper
1454 %0 = bitcast <2 x i64> %__a to <4 x i32>
1455 %1 = bitcast <2 x i64> %__b to <4 x i32>
1456 %2 = icmp eq <4 x i32> %0, %1
1457 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1458 %4 = bitcast <32 x i1> %3 to i32
1462 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1463 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1464 ; VLX: # %bb.0: # %entry
1465 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1466 ; VLX-NEXT: kmovd %k0, %eax
1469 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1470 ; NoVLX: # %bb.0: # %entry
1471 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1472 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1473 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1474 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1475 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1476 ; NoVLX-NEXT: kmovw %k0, %eax
1477 ; NoVLX-NEXT: vzeroupper
1480 %0 = bitcast <2 x i64> %__a to <4 x i32>
1481 %load = load <2 x i64>, <2 x i64>* %__b
1482 %1 = bitcast <2 x i64> %load to <4 x i32>
1483 %2 = icmp eq <4 x i32> %0, %1
1484 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1485 %4 = bitcast <32 x i1> %3 to i32
1489 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1490 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1491 ; VLX: # %bb.0: # %entry
1492 ; VLX-NEXT: kmovd %edi, %k1
1493 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1494 ; VLX-NEXT: kmovd %k0, %eax
1497 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1498 ; NoVLX: # %bb.0: # %entry
1499 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1500 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1501 ; NoVLX-NEXT: kmovw %edi, %k1
1502 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1503 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1504 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1505 ; NoVLX-NEXT: kmovw %k0, %eax
1506 ; NoVLX-NEXT: vzeroupper
1509 %0 = bitcast <2 x i64> %__a to <4 x i32>
1510 %1 = bitcast <2 x i64> %__b to <4 x i32>
1511 %2 = icmp eq <4 x i32> %0, %1
1512 %3 = bitcast i8 %__u to <8 x i1>
1513 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1514 %4 = and <4 x i1> %2, %extract.i
1515 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1516 %6 = bitcast <32 x i1> %5 to i32
1520 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1521 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1522 ; VLX: # %bb.0: # %entry
1523 ; VLX-NEXT: kmovd %edi, %k1
1524 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1525 ; VLX-NEXT: kmovd %k0, %eax
1528 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1529 ; NoVLX: # %bb.0: # %entry
1530 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1531 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1532 ; NoVLX-NEXT: kmovw %edi, %k1
1533 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1534 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1535 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1536 ; NoVLX-NEXT: kmovw %k0, %eax
1537 ; NoVLX-NEXT: vzeroupper
1540 %0 = bitcast <2 x i64> %__a to <4 x i32>
1541 %load = load <2 x i64>, <2 x i64>* %__b
1542 %1 = bitcast <2 x i64> %load to <4 x i32>
1543 %2 = icmp eq <4 x i32> %0, %1
1544 %3 = bitcast i8 %__u to <8 x i1>
1545 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1546 %4 = and <4 x i1> %2, %extract.i
1547 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1548 %6 = bitcast <32 x i1> %5 to i32
1553 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1554 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1555 ; VLX: # %bb.0: # %entry
1556 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1557 ; VLX-NEXT: kmovd %k0, %eax
1560 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1561 ; NoVLX: # %bb.0: # %entry
1562 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1563 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1564 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1565 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1566 ; NoVLX-NEXT: kmovw %k0, %eax
1567 ; NoVLX-NEXT: vzeroupper
1570 %0 = bitcast <2 x i64> %__a to <4 x i32>
1571 %load = load i32, i32* %__b
1572 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1573 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1574 %2 = icmp eq <4 x i32> %0, %1
1575 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1576 %4 = bitcast <32 x i1> %3 to i32
1580 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1581 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1582 ; VLX: # %bb.0: # %entry
1583 ; VLX-NEXT: kmovd %edi, %k1
1584 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1585 ; VLX-NEXT: kmovd %k0, %eax
1588 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1589 ; NoVLX: # %bb.0: # %entry
1590 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1591 ; NoVLX-NEXT: kmovw %edi, %k1
1592 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1593 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1594 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1595 ; NoVLX-NEXT: kmovw %k0, %eax
1596 ; NoVLX-NEXT: vzeroupper
1599 %0 = bitcast <2 x i64> %__a to <4 x i32>
1600 %load = load i32, i32* %__b
1601 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1602 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1603 %2 = icmp eq <4 x i32> %0, %1
1604 %3 = bitcast i8 %__u to <8 x i1>
1605 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1606 %4 = and <4 x i1> %extract.i, %2
1607 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1608 %6 = bitcast <32 x i1> %5 to i32
1613 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1614 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1615 ; VLX: # %bb.0: # %entry
1616 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1617 ; VLX-NEXT: kmovq %k0, %rax
1620 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1621 ; NoVLX: # %bb.0: # %entry
1622 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1623 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1624 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1625 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1626 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1627 ; NoVLX-NEXT: kmovw %k0, %eax
1628 ; NoVLX-NEXT: vzeroupper
1631 %0 = bitcast <2 x i64> %__a to <4 x i32>
1632 %1 = bitcast <2 x i64> %__b to <4 x i32>
1633 %2 = icmp eq <4 x i32> %0, %1
1634 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1635 %4 = bitcast <64 x i1> %3 to i64
1639 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1640 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1641 ; VLX: # %bb.0: # %entry
1642 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1643 ; VLX-NEXT: kmovq %k0, %rax
1646 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1647 ; NoVLX: # %bb.0: # %entry
1648 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1649 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1650 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1651 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1652 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1653 ; NoVLX-NEXT: kmovw %k0, %eax
1654 ; NoVLX-NEXT: vzeroupper
1657 %0 = bitcast <2 x i64> %__a to <4 x i32>
1658 %load = load <2 x i64>, <2 x i64>* %__b
1659 %1 = bitcast <2 x i64> %load to <4 x i32>
1660 %2 = icmp eq <4 x i32> %0, %1
1661 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1662 %4 = bitcast <64 x i1> %3 to i64
1666 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1667 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1668 ; VLX: # %bb.0: # %entry
1669 ; VLX-NEXT: kmovd %edi, %k1
1670 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1671 ; VLX-NEXT: kmovq %k0, %rax
1674 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1675 ; NoVLX: # %bb.0: # %entry
1676 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1677 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1678 ; NoVLX-NEXT: kmovw %edi, %k1
1679 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1680 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1681 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1682 ; NoVLX-NEXT: kmovw %k0, %eax
1683 ; NoVLX-NEXT: vzeroupper
1686 %0 = bitcast <2 x i64> %__a to <4 x i32>
1687 %1 = bitcast <2 x i64> %__b to <4 x i32>
1688 %2 = icmp eq <4 x i32> %0, %1
1689 %3 = bitcast i8 %__u to <8 x i1>
1690 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1691 %4 = and <4 x i1> %2, %extract.i
1692 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1693 %6 = bitcast <64 x i1> %5 to i64
1697 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1698 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1699 ; VLX: # %bb.0: # %entry
1700 ; VLX-NEXT: kmovd %edi, %k1
1701 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1702 ; VLX-NEXT: kmovq %k0, %rax
1705 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1706 ; NoVLX: # %bb.0: # %entry
1707 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1708 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1709 ; NoVLX-NEXT: kmovw %edi, %k1
1710 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1711 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1712 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1713 ; NoVLX-NEXT: kmovw %k0, %eax
1714 ; NoVLX-NEXT: vzeroupper
1717 %0 = bitcast <2 x i64> %__a to <4 x i32>
1718 %load = load <2 x i64>, <2 x i64>* %__b
1719 %1 = bitcast <2 x i64> %load to <4 x i32>
1720 %2 = icmp eq <4 x i32> %0, %1
1721 %3 = bitcast i8 %__u to <8 x i1>
1722 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1723 %4 = and <4 x i1> %2, %extract.i
1724 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1725 %6 = bitcast <64 x i1> %5 to i64
1730 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1731 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1732 ; VLX: # %bb.0: # %entry
1733 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1734 ; VLX-NEXT: kmovq %k0, %rax
1737 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1738 ; NoVLX: # %bb.0: # %entry
1739 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1740 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1741 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1742 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1743 ; NoVLX-NEXT: kmovw %k0, %eax
1744 ; NoVLX-NEXT: vzeroupper
1747 %0 = bitcast <2 x i64> %__a to <4 x i32>
1748 %load = load i32, i32* %__b
1749 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1750 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1751 %2 = icmp eq <4 x i32> %0, %1
1752 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1753 %4 = bitcast <64 x i1> %3 to i64
1757 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1758 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1759 ; VLX: # %bb.0: # %entry
1760 ; VLX-NEXT: kmovd %edi, %k1
1761 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1762 ; VLX-NEXT: kmovq %k0, %rax
1765 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1766 ; NoVLX: # %bb.0: # %entry
1767 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1768 ; NoVLX-NEXT: kmovw %edi, %k1
1769 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1770 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1771 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1772 ; NoVLX-NEXT: kmovw %k0, %eax
1773 ; NoVLX-NEXT: vzeroupper
1776 %0 = bitcast <2 x i64> %__a to <4 x i32>
1777 %load = load i32, i32* %__b
1778 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1779 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1780 %2 = icmp eq <4 x i32> %0, %1
1781 %3 = bitcast i8 %__u to <8 x i1>
1782 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1783 %4 = and <4 x i1> %extract.i, %2
1784 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1785 %6 = bitcast <64 x i1> %5 to i64
1790 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1791 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1792 ; VLX: # %bb.0: # %entry
1793 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
1794 ; VLX-NEXT: kmovd %k0, %eax
1795 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1796 ; VLX-NEXT: vzeroupper
1799 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1800 ; NoVLX: # %bb.0: # %entry
1801 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1802 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1803 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1804 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1805 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1806 ; NoVLX-NEXT: kmovw %k0, %eax
1807 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1808 ; NoVLX-NEXT: vzeroupper
1811 %0 = bitcast <4 x i64> %__a to <8 x i32>
1812 %1 = bitcast <4 x i64> %__b to <8 x i32>
1813 %2 = icmp eq <8 x i32> %0, %1
1814 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1815 %4 = bitcast <16 x i1> %3 to i16
1819 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
1820 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1821 ; VLX: # %bb.0: # %entry
1822 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
1823 ; VLX-NEXT: kmovd %k0, %eax
1824 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1825 ; VLX-NEXT: vzeroupper
1828 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1829 ; NoVLX: # %bb.0: # %entry
1830 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1831 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
1832 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1833 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1834 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1835 ; NoVLX-NEXT: kmovw %k0, %eax
1836 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1837 ; NoVLX-NEXT: vzeroupper
1840 %0 = bitcast <4 x i64> %__a to <8 x i32>
1841 %load = load <4 x i64>, <4 x i64>* %__b
1842 %1 = bitcast <4 x i64> %load to <8 x i32>
1843 %2 = icmp eq <8 x i32> %0, %1
1844 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1845 %4 = bitcast <16 x i1> %3 to i16
1849 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1850 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1851 ; VLX: # %bb.0: # %entry
1852 ; VLX-NEXT: kmovd %edi, %k1
1853 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
1854 ; VLX-NEXT: kmovd %k0, %eax
1855 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1856 ; VLX-NEXT: vzeroupper
1859 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1860 ; NoVLX: # %bb.0: # %entry
1861 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1862 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1863 ; NoVLX-NEXT: kmovw %edi, %k1
1864 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1865 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1866 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1867 ; NoVLX-NEXT: kmovw %k0, %eax
1868 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1869 ; NoVLX-NEXT: vzeroupper
1872 %0 = bitcast <4 x i64> %__a to <8 x i32>
1873 %1 = bitcast <4 x i64> %__b to <8 x i32>
1874 %2 = icmp eq <8 x i32> %0, %1
1875 %3 = bitcast i8 %__u to <8 x i1>
1876 %4 = and <8 x i1> %2, %3
1877 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1878 %6 = bitcast <16 x i1> %5 to i16
1882 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
1883 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1884 ; VLX: # %bb.0: # %entry
1885 ; VLX-NEXT: kmovd %edi, %k1
1886 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
1887 ; VLX-NEXT: kmovd %k0, %eax
1888 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1889 ; VLX-NEXT: vzeroupper
1892 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1893 ; NoVLX: # %bb.0: # %entry
1894 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1895 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
1896 ; NoVLX-NEXT: kmovw %edi, %k1
1897 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1898 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1899 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1900 ; NoVLX-NEXT: kmovw %k0, %eax
1901 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1902 ; NoVLX-NEXT: vzeroupper
1905 %0 = bitcast <4 x i64> %__a to <8 x i32>
1906 %load = load <4 x i64>, <4 x i64>* %__b
1907 %1 = bitcast <4 x i64> %load to <8 x i32>
1908 %2 = icmp eq <8 x i32> %0, %1
1909 %3 = bitcast i8 %__u to <8 x i1>
1910 %4 = and <8 x i1> %2, %3
1911 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1912 %6 = bitcast <16 x i1> %5 to i16
1917 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
1918 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1919 ; VLX: # %bb.0: # %entry
1920 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
1921 ; VLX-NEXT: kmovd %k0, %eax
1922 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1923 ; VLX-NEXT: vzeroupper
1926 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1927 ; NoVLX: # %bb.0: # %entry
1928 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1929 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1930 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1931 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1932 ; NoVLX-NEXT: kmovw %k0, %eax
1933 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1934 ; NoVLX-NEXT: vzeroupper
1937 %0 = bitcast <4 x i64> %__a to <8 x i32>
1938 %load = load i32, i32* %__b
1939 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
1940 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1941 %2 = icmp eq <8 x i32> %0, %1
1942 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1943 %4 = bitcast <16 x i1> %3 to i16
1947 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
1948 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1949 ; VLX: # %bb.0: # %entry
1950 ; VLX-NEXT: kmovd %edi, %k1
1951 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
1952 ; VLX-NEXT: kmovd %k0, %eax
1953 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1954 ; VLX-NEXT: vzeroupper
1957 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1958 ; NoVLX: # %bb.0: # %entry
1959 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1960 ; NoVLX-NEXT: kmovw %edi, %k1
1961 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1962 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1963 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1964 ; NoVLX-NEXT: kmovw %k0, %eax
1965 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1966 ; NoVLX-NEXT: vzeroupper
1969 %0 = bitcast <4 x i64> %__a to <8 x i32>
1970 %load = load i32, i32* %__b
1971 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
1972 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1973 %2 = icmp eq <8 x i32> %0, %1
1974 %3 = bitcast i8 %__u to <8 x i1>
1975 %4 = and <8 x i1> %3, %2
1976 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1977 %6 = bitcast <16 x i1> %5 to i16
1982 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1983 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
1984 ; VLX: # %bb.0: # %entry
1985 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
1986 ; VLX-NEXT: kmovd %k0, %eax
1987 ; VLX-NEXT: vzeroupper
1990 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
1991 ; NoVLX: # %bb.0: # %entry
1992 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1993 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1994 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1995 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1996 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1997 ; NoVLX-NEXT: kmovw %k0, %eax
1998 ; NoVLX-NEXT: vzeroupper
2001 %0 = bitcast <4 x i64> %__a to <8 x i32>
2002 %1 = bitcast <4 x i64> %__b to <8 x i32>
2003 %2 = icmp eq <8 x i32> %0, %1
2004 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2005 %4 = bitcast <32 x i1> %3 to i32
2009 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2010 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2011 ; VLX: # %bb.0: # %entry
2012 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
2013 ; VLX-NEXT: kmovd %k0, %eax
2014 ; VLX-NEXT: vzeroupper
2017 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2018 ; NoVLX: # %bb.0: # %entry
2019 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2020 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
2021 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2022 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2023 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2024 ; NoVLX-NEXT: kmovw %k0, %eax
2025 ; NoVLX-NEXT: vzeroupper
2028 %0 = bitcast <4 x i64> %__a to <8 x i32>
2029 %load = load <4 x i64>, <4 x i64>* %__b
2030 %1 = bitcast <4 x i64> %load to <8 x i32>
2031 %2 = icmp eq <8 x i32> %0, %1
2032 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2033 %4 = bitcast <32 x i1> %3 to i32
2037 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2038 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2039 ; VLX: # %bb.0: # %entry
2040 ; VLX-NEXT: kmovd %edi, %k1
2041 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2042 ; VLX-NEXT: kmovd %k0, %eax
2043 ; VLX-NEXT: vzeroupper
2046 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2047 ; NoVLX: # %bb.0: # %entry
2048 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2049 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2050 ; NoVLX-NEXT: kmovw %edi, %k1
2051 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2052 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2053 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2054 ; NoVLX-NEXT: kmovw %k0, %eax
2055 ; NoVLX-NEXT: vzeroupper
2058 %0 = bitcast <4 x i64> %__a to <8 x i32>
2059 %1 = bitcast <4 x i64> %__b to <8 x i32>
2060 %2 = icmp eq <8 x i32> %0, %1
2061 %3 = bitcast i8 %__u to <8 x i1>
2062 %4 = and <8 x i1> %2, %3
2063 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2064 %6 = bitcast <32 x i1> %5 to i32
2068 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2069 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2070 ; VLX: # %bb.0: # %entry
2071 ; VLX-NEXT: kmovd %edi, %k1
2072 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2073 ; VLX-NEXT: kmovd %k0, %eax
2074 ; VLX-NEXT: vzeroupper
2077 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2078 ; NoVLX: # %bb.0: # %entry
2079 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2080 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
2081 ; NoVLX-NEXT: kmovw %edi, %k1
2082 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2083 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2084 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2085 ; NoVLX-NEXT: kmovw %k0, %eax
2086 ; NoVLX-NEXT: vzeroupper
2089 %0 = bitcast <4 x i64> %__a to <8 x i32>
2090 %load = load <4 x i64>, <4 x i64>* %__b
2091 %1 = bitcast <4 x i64> %load to <8 x i32>
2092 %2 = icmp eq <8 x i32> %0, %1
2093 %3 = bitcast i8 %__u to <8 x i1>
2094 %4 = and <8 x i1> %2, %3
2095 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2096 %6 = bitcast <32 x i1> %5 to i32
2101 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
2102 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2103 ; VLX: # %bb.0: # %entry
2104 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2105 ; VLX-NEXT: kmovd %k0, %eax
2106 ; VLX-NEXT: vzeroupper
2109 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2110 ; NoVLX: # %bb.0: # %entry
2111 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2112 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2113 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2114 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2115 ; NoVLX-NEXT: kmovw %k0, %eax
2116 ; NoVLX-NEXT: vzeroupper
2119 %0 = bitcast <4 x i64> %__a to <8 x i32>
2120 %load = load i32, i32* %__b
2121 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2122 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2123 %2 = icmp eq <8 x i32> %0, %1
2124 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2125 %4 = bitcast <32 x i1> %3 to i32
2129 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
2130 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2131 ; VLX: # %bb.0: # %entry
2132 ; VLX-NEXT: kmovd %edi, %k1
2133 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2134 ; VLX-NEXT: kmovd %k0, %eax
2135 ; VLX-NEXT: vzeroupper
2138 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2139 ; NoVLX: # %bb.0: # %entry
2140 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2141 ; NoVLX-NEXT: kmovw %edi, %k1
2142 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2143 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2144 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2145 ; NoVLX-NEXT: kmovw %k0, %eax
2146 ; NoVLX-NEXT: vzeroupper
2149 %0 = bitcast <4 x i64> %__a to <8 x i32>
2150 %load = load i32, i32* %__b
2151 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2152 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2153 %2 = icmp eq <8 x i32> %0, %1
2154 %3 = bitcast i8 %__u to <8 x i1>
2155 %4 = and <8 x i1> %3, %2
2156 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2157 %6 = bitcast <32 x i1> %5 to i32
2162 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2163 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2164 ; VLX: # %bb.0: # %entry
2165 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
2166 ; VLX-NEXT: kmovq %k0, %rax
2167 ; VLX-NEXT: vzeroupper
2170 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2171 ; NoVLX: # %bb.0: # %entry
2172 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2173 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2174 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2175 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2176 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2177 ; NoVLX-NEXT: kmovw %k0, %eax
2178 ; NoVLX-NEXT: vzeroupper
2181 %0 = bitcast <4 x i64> %__a to <8 x i32>
2182 %1 = bitcast <4 x i64> %__b to <8 x i32>
2183 %2 = icmp eq <8 x i32> %0, %1
2184 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2185 %4 = bitcast <64 x i1> %3 to i64
2189 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2190 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2191 ; VLX: # %bb.0: # %entry
2192 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
2193 ; VLX-NEXT: kmovq %k0, %rax
2194 ; VLX-NEXT: vzeroupper
2197 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2198 ; NoVLX: # %bb.0: # %entry
2199 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2200 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
2201 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2202 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2203 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2204 ; NoVLX-NEXT: kmovw %k0, %eax
2205 ; NoVLX-NEXT: vzeroupper
2208 %0 = bitcast <4 x i64> %__a to <8 x i32>
2209 %load = load <4 x i64>, <4 x i64>* %__b
2210 %1 = bitcast <4 x i64> %load to <8 x i32>
2211 %2 = icmp eq <8 x i32> %0, %1
2212 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2213 %4 = bitcast <64 x i1> %3 to i64
2217 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2218 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2219 ; VLX: # %bb.0: # %entry
2220 ; VLX-NEXT: kmovd %edi, %k1
2221 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2222 ; VLX-NEXT: kmovq %k0, %rax
2223 ; VLX-NEXT: vzeroupper
2226 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2227 ; NoVLX: # %bb.0: # %entry
2228 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2229 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2230 ; NoVLX-NEXT: kmovw %edi, %k1
2231 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2232 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2233 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2234 ; NoVLX-NEXT: kmovw %k0, %eax
2235 ; NoVLX-NEXT: vzeroupper
2238 %0 = bitcast <4 x i64> %__a to <8 x i32>
2239 %1 = bitcast <4 x i64> %__b to <8 x i32>
2240 %2 = icmp eq <8 x i32> %0, %1
2241 %3 = bitcast i8 %__u to <8 x i1>
2242 %4 = and <8 x i1> %2, %3
2243 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2244 %6 = bitcast <64 x i1> %5 to i64
2248 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2249 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2250 ; VLX: # %bb.0: # %entry
2251 ; VLX-NEXT: kmovd %edi, %k1
2252 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2253 ; VLX-NEXT: kmovq %k0, %rax
2254 ; VLX-NEXT: vzeroupper
2257 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2258 ; NoVLX: # %bb.0: # %entry
2259 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2260 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
2261 ; NoVLX-NEXT: kmovw %edi, %k1
2262 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2263 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2264 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2265 ; NoVLX-NEXT: kmovw %k0, %eax
2266 ; NoVLX-NEXT: vzeroupper
2269 %0 = bitcast <4 x i64> %__a to <8 x i32>
2270 %load = load <4 x i64>, <4 x i64>* %__b
2271 %1 = bitcast <4 x i64> %load to <8 x i32>
2272 %2 = icmp eq <8 x i32> %0, %1
2273 %3 = bitcast i8 %__u to <8 x i1>
2274 %4 = and <8 x i1> %2, %3
2275 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2276 %6 = bitcast <64 x i1> %5 to i64
2281 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
2282 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2283 ; VLX: # %bb.0: # %entry
2284 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2285 ; VLX-NEXT: kmovq %k0, %rax
2286 ; VLX-NEXT: vzeroupper
2289 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2290 ; NoVLX: # %bb.0: # %entry
2291 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2292 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2293 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2294 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2295 ; NoVLX-NEXT: kmovw %k0, %eax
2296 ; NoVLX-NEXT: vzeroupper
2299 %0 = bitcast <4 x i64> %__a to <8 x i32>
2300 %load = load i32, i32* %__b
2301 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2302 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2303 %2 = icmp eq <8 x i32> %0, %1
2304 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2305 %4 = bitcast <64 x i1> %3 to i64
2309 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
2310 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2311 ; VLX: # %bb.0: # %entry
2312 ; VLX-NEXT: kmovd %edi, %k1
2313 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2314 ; VLX-NEXT: kmovq %k0, %rax
2315 ; VLX-NEXT: vzeroupper
2318 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2319 ; NoVLX: # %bb.0: # %entry
2320 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2321 ; NoVLX-NEXT: kmovw %edi, %k1
2322 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2323 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2324 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2325 ; NoVLX-NEXT: kmovw %k0, %eax
2326 ; NoVLX-NEXT: vzeroupper
2329 %0 = bitcast <4 x i64> %__a to <8 x i32>
2330 %load = load i32, i32* %__b
2331 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2332 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2333 %2 = icmp eq <8 x i32> %0, %1
2334 %3 = bitcast i8 %__u to <8 x i1>
2335 %4 = and <8 x i1> %3, %2
2336 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2337 %6 = bitcast <64 x i1> %5 to i64
2342 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2343 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2344 ; VLX: # %bb.0: # %entry
2345 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2346 ; VLX-NEXT: kmovd %k0, %eax
2347 ; VLX-NEXT: vzeroupper
2350 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2351 ; NoVLX: # %bb.0: # %entry
2352 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2353 ; NoVLX-NEXT: kmovw %k0, %eax
2354 ; NoVLX-NEXT: vzeroupper
2357 %0 = bitcast <8 x i64> %__a to <16 x i32>
2358 %1 = bitcast <8 x i64> %__b to <16 x i32>
2359 %2 = icmp eq <16 x i32> %0, %1
2360 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2361 %4 = bitcast <32 x i1> %3 to i32
2365 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2366 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2367 ; VLX: # %bb.0: # %entry
2368 ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2369 ; VLX-NEXT: kmovd %k0, %eax
2370 ; VLX-NEXT: vzeroupper
2373 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2374 ; NoVLX: # %bb.0: # %entry
2375 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2376 ; NoVLX-NEXT: kmovw %k0, %eax
2377 ; NoVLX-NEXT: vzeroupper
2380 %0 = bitcast <8 x i64> %__a to <16 x i32>
2381 %load = load <8 x i64>, <8 x i64>* %__b
2382 %1 = bitcast <8 x i64> %load to <16 x i32>
2383 %2 = icmp eq <16 x i32> %0, %1
2384 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2385 %4 = bitcast <32 x i1> %3 to i32
2389 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2390 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2391 ; VLX: # %bb.0: # %entry
2392 ; VLX-NEXT: kmovd %edi, %k1
2393 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2394 ; VLX-NEXT: kmovd %k0, %eax
2395 ; VLX-NEXT: vzeroupper
2398 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2399 ; NoVLX: # %bb.0: # %entry
2400 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2401 ; NoVLX-NEXT: kmovw %k0, %eax
2402 ; NoVLX-NEXT: andl %edi, %eax
2403 ; NoVLX-NEXT: vzeroupper
2406 %0 = bitcast <8 x i64> %__a to <16 x i32>
2407 %1 = bitcast <8 x i64> %__b to <16 x i32>
2408 %2 = icmp eq <16 x i32> %0, %1
2409 %3 = bitcast i16 %__u to <16 x i1>
2410 %4 = and <16 x i1> %2, %3
2411 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2412 %6 = bitcast <32 x i1> %5 to i32
2416 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2417 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2418 ; VLX: # %bb.0: # %entry
2419 ; VLX-NEXT: kmovd %edi, %k1
2420 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2421 ; VLX-NEXT: kmovd %k0, %eax
2422 ; VLX-NEXT: vzeroupper
2425 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2426 ; NoVLX: # %bb.0: # %entry
2427 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
2428 ; NoVLX-NEXT: kmovw %k0, %eax
2429 ; NoVLX-NEXT: andl %edi, %eax
2430 ; NoVLX-NEXT: vzeroupper
2433 %0 = bitcast <8 x i64> %__a to <16 x i32>
2434 %load = load <8 x i64>, <8 x i64>* %__b
2435 %1 = bitcast <8 x i64> %load to <16 x i32>
2436 %2 = icmp eq <16 x i32> %0, %1
2437 %3 = bitcast i16 %__u to <16 x i1>
2438 %4 = and <16 x i1> %2, %3
2439 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2440 %6 = bitcast <32 x i1> %5 to i32
2445 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
2446 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2447 ; VLX: # %bb.0: # %entry
2448 ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2449 ; VLX-NEXT: kmovd %k0, %eax
2450 ; VLX-NEXT: vzeroupper
2453 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2454 ; NoVLX: # %bb.0: # %entry
2455 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2456 ; NoVLX-NEXT: kmovw %k0, %eax
2457 ; NoVLX-NEXT: vzeroupper
2460 %0 = bitcast <8 x i64> %__a to <16 x i32>
2461 %load = load i32, i32* %__b
2462 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2463 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2464 %2 = icmp eq <16 x i32> %0, %1
2465 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2466 %4 = bitcast <32 x i1> %3 to i32
2470 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
2471 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2472 ; VLX: # %bb.0: # %entry
2473 ; VLX-NEXT: kmovd %edi, %k1
2474 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2475 ; VLX-NEXT: kmovd %k0, %eax
2476 ; VLX-NEXT: vzeroupper
2479 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2480 ; NoVLX: # %bb.0: # %entry
2481 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2482 ; NoVLX-NEXT: kmovw %k0, %eax
2483 ; NoVLX-NEXT: andl %edi, %eax
2484 ; NoVLX-NEXT: vzeroupper
2487 %0 = bitcast <8 x i64> %__a to <16 x i32>
2488 %load = load i32, i32* %__b
2489 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2490 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2491 %2 = icmp eq <16 x i32> %0, %1
2492 %3 = bitcast i16 %__u to <16 x i1>
2493 %4 = and <16 x i1> %3, %2
2494 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2495 %6 = bitcast <32 x i1> %5 to i32
2500 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2501 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2502 ; VLX: # %bb.0: # %entry
2503 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2504 ; VLX-NEXT: kmovq %k0, %rax
2505 ; VLX-NEXT: vzeroupper
2508 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2509 ; NoVLX: # %bb.0: # %entry
2510 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2511 ; NoVLX-NEXT: kmovw %k0, %eax
2512 ; NoVLX-NEXT: vzeroupper
2515 %0 = bitcast <8 x i64> %__a to <16 x i32>
2516 %1 = bitcast <8 x i64> %__b to <16 x i32>
2517 %2 = icmp eq <16 x i32> %0, %1
2518 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2519 %4 = bitcast <64 x i1> %3 to i64
2523 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2524 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2525 ; VLX: # %bb.0: # %entry
2526 ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2527 ; VLX-NEXT: kmovq %k0, %rax
2528 ; VLX-NEXT: vzeroupper
2531 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2532 ; NoVLX: # %bb.0: # %entry
2533 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2534 ; NoVLX-NEXT: kmovw %k0, %eax
2535 ; NoVLX-NEXT: vzeroupper
2538 %0 = bitcast <8 x i64> %__a to <16 x i32>
2539 %load = load <8 x i64>, <8 x i64>* %__b
2540 %1 = bitcast <8 x i64> %load to <16 x i32>
2541 %2 = icmp eq <16 x i32> %0, %1
2542 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2543 %4 = bitcast <64 x i1> %3 to i64
2547 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2548 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2549 ; VLX: # %bb.0: # %entry
2550 ; VLX-NEXT: kmovd %edi, %k1
2551 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2552 ; VLX-NEXT: kmovq %k0, %rax
2553 ; VLX-NEXT: vzeroupper
2556 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2557 ; NoVLX: # %bb.0: # %entry
2558 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2559 ; NoVLX-NEXT: kmovw %k0, %eax
2560 ; NoVLX-NEXT: andl %edi, %eax
2561 ; NoVLX-NEXT: vzeroupper
2564 %0 = bitcast <8 x i64> %__a to <16 x i32>
2565 %1 = bitcast <8 x i64> %__b to <16 x i32>
2566 %2 = icmp eq <16 x i32> %0, %1
2567 %3 = bitcast i16 %__u to <16 x i1>
2568 %4 = and <16 x i1> %2, %3
2569 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2570 %6 = bitcast <64 x i1> %5 to i64
2574 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2575 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2576 ; VLX: # %bb.0: # %entry
2577 ; VLX-NEXT: kmovd %edi, %k1
2578 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2579 ; VLX-NEXT: kmovq %k0, %rax
2580 ; VLX-NEXT: vzeroupper
2583 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2584 ; NoVLX: # %bb.0: # %entry
2585 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
2586 ; NoVLX-NEXT: kmovw %k0, %eax
2587 ; NoVLX-NEXT: andl %edi, %eax
2588 ; NoVLX-NEXT: vzeroupper
2591 %0 = bitcast <8 x i64> %__a to <16 x i32>
2592 %load = load <8 x i64>, <8 x i64>* %__b
2593 %1 = bitcast <8 x i64> %load to <16 x i32>
2594 %2 = icmp eq <16 x i32> %0, %1
2595 %3 = bitcast i16 %__u to <16 x i1>
2596 %4 = and <16 x i1> %2, %3
2597 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2598 %6 = bitcast <64 x i1> %5 to i64
2603 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
2604 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2605 ; VLX: # %bb.0: # %entry
2606 ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2607 ; VLX-NEXT: kmovq %k0, %rax
2608 ; VLX-NEXT: vzeroupper
2611 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2612 ; NoVLX: # %bb.0: # %entry
2613 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2614 ; NoVLX-NEXT: kmovw %k0, %eax
2615 ; NoVLX-NEXT: vzeroupper
2618 %0 = bitcast <8 x i64> %__a to <16 x i32>
2619 %load = load i32, i32* %__b
2620 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2621 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2622 %2 = icmp eq <16 x i32> %0, %1
2623 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2624 %4 = bitcast <64 x i1> %3 to i64
2628 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
2629 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2630 ; VLX: # %bb.0: # %entry
2631 ; VLX-NEXT: kmovd %edi, %k1
2632 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2633 ; VLX-NEXT: kmovq %k0, %rax
2634 ; VLX-NEXT: vzeroupper
2637 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2638 ; NoVLX: # %bb.0: # %entry
2639 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2640 ; NoVLX-NEXT: kmovw %k0, %eax
2641 ; NoVLX-NEXT: andl %edi, %eax
2642 ; NoVLX-NEXT: vzeroupper
2645 %0 = bitcast <8 x i64> %__a to <16 x i32>
2646 %load = load i32, i32* %__b
2647 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2648 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2649 %2 = icmp eq <16 x i32> %0, %1
2650 %3 = bitcast i16 %__u to <16 x i1>
2651 %4 = and <16 x i1> %3, %2
2652 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2653 %6 = bitcast <64 x i1> %5 to i64
2658 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2659 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2660 ; VLX: # %bb.0: # %entry
2661 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
2662 ; VLX-NEXT: kmovb %k0, %eax
2665 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2666 ; NoVLX: # %bb.0: # %entry
2667 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2668 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2669 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2670 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2671 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2672 ; NoVLX-NEXT: kmovw %k0, %eax
2673 ; NoVLX-NEXT: vzeroupper
2676 %0 = bitcast <2 x i64> %__a to <2 x i64>
2677 %1 = bitcast <2 x i64> %__b to <2 x i64>
2678 %2 = icmp eq <2 x i64> %0, %1
2679 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2680 %4 = bitcast <4 x i1> %3 to i4
2684 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2685 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2686 ; VLX: # %bb.0: # %entry
2687 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
2688 ; VLX-NEXT: kmovb %k0, %eax
2691 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2692 ; NoVLX: # %bb.0: # %entry
2693 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2694 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2695 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2696 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2697 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2698 ; NoVLX-NEXT: kmovw %k0, %eax
2699 ; NoVLX-NEXT: vzeroupper
2702 %0 = bitcast <2 x i64> %__a to <2 x i64>
2703 %load = load <2 x i64>, <2 x i64>* %__b
2704 %1 = bitcast <2 x i64> %load to <2 x i64>
2705 %2 = icmp eq <2 x i64> %0, %1
2706 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2707 %4 = bitcast <4 x i1> %3 to i4
2711 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2712 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2713 ; VLX: # %bb.0: # %entry
2714 ; VLX-NEXT: kmovd %edi, %k1
2715 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2716 ; VLX-NEXT: kmovb %k0, %eax
2719 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2720 ; NoVLX: # %bb.0: # %entry
2721 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2722 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2723 ; NoVLX-NEXT: kmovw %edi, %k1
2724 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2725 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2726 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2727 ; NoVLX-NEXT: kmovw %k0, %eax
2728 ; NoVLX-NEXT: vzeroupper
2731 %0 = bitcast <2 x i64> %__a to <2 x i64>
2732 %1 = bitcast <2 x i64> %__b to <2 x i64>
2733 %2 = icmp eq <2 x i64> %0, %1
2734 %3 = bitcast i8 %__u to <8 x i1>
2735 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2736 %4 = and <2 x i1> %2, %extract.i
2737 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2738 %6 = bitcast <4 x i1> %5 to i4
2742 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2743 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2744 ; VLX: # %bb.0: # %entry
2745 ; VLX-NEXT: kmovd %edi, %k1
2746 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2747 ; VLX-NEXT: kmovb %k0, %eax
2750 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2751 ; NoVLX: # %bb.0: # %entry
2752 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2753 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2754 ; NoVLX-NEXT: kmovw %edi, %k1
2755 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2756 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2757 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2758 ; NoVLX-NEXT: kmovw %k0, %eax
2759 ; NoVLX-NEXT: vzeroupper
2762 %0 = bitcast <2 x i64> %__a to <2 x i64>
2763 %load = load <2 x i64>, <2 x i64>* %__b
2764 %1 = bitcast <2 x i64> %load to <2 x i64>
2765 %2 = icmp eq <2 x i64> %0, %1
2766 %3 = bitcast i8 %__u to <8 x i1>
2767 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2768 %4 = and <2 x i1> %2, %extract.i
2769 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2770 %6 = bitcast <4 x i1> %5 to i4
2775 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
2776 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2777 ; VLX: # %bb.0: # %entry
2778 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
2779 ; VLX-NEXT: kmovb %k0, %eax
2782 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2783 ; NoVLX: # %bb.0: # %entry
2784 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2785 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
2786 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2787 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2788 ; NoVLX-NEXT: kmovw %k0, %eax
2789 ; NoVLX-NEXT: vzeroupper
2792 %0 = bitcast <2 x i64> %__a to <2 x i64>
2793 %load = load i64, i64* %__b
2794 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2795 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2796 %2 = icmp eq <2 x i64> %0, %1
2797 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2798 %4 = bitcast <4 x i1> %3 to i4
2802 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
2803 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2804 ; VLX: # %bb.0: # %entry
2805 ; VLX-NEXT: kmovd %edi, %k1
2806 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
2807 ; VLX-NEXT: kmovb %k0, %eax
2810 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2811 ; NoVLX: # %bb.0: # %entry
2812 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2813 ; NoVLX-NEXT: kmovw %edi, %k1
2814 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
2815 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2816 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2817 ; NoVLX-NEXT: kmovw %k0, %eax
2818 ; NoVLX-NEXT: vzeroupper
2821 %0 = bitcast <2 x i64> %__a to <2 x i64>
2822 %load = load i64, i64* %__b
2823 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2824 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2825 %2 = icmp eq <2 x i64> %0, %1
2826 %3 = bitcast i8 %__u to <8 x i1>
2827 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2828 %4 = and <2 x i1> %extract.i, %2
2829 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2830 %6 = bitcast <4 x i1> %5 to i4
2835 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2836 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2837 ; VLX: # %bb.0: # %entry
2838 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
2839 ; VLX-NEXT: kmovd %k0, %eax
2840 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2843 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2844 ; NoVLX: # %bb.0: # %entry
2845 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2846 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2847 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2848 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2849 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2850 ; NoVLX-NEXT: kmovw %k0, %eax
2851 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2852 ; NoVLX-NEXT: vzeroupper
2855 %0 = bitcast <2 x i64> %__a to <2 x i64>
2856 %1 = bitcast <2 x i64> %__b to <2 x i64>
2857 %2 = icmp eq <2 x i64> %0, %1
2858 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2859 %4 = bitcast <8 x i1> %3 to i8
2863 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2864 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2865 ; VLX: # %bb.0: # %entry
2866 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
2867 ; VLX-NEXT: kmovd %k0, %eax
2868 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2871 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2872 ; NoVLX: # %bb.0: # %entry
2873 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2874 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2875 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2876 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2877 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2878 ; NoVLX-NEXT: kmovw %k0, %eax
2879 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2880 ; NoVLX-NEXT: vzeroupper
2883 %0 = bitcast <2 x i64> %__a to <2 x i64>
2884 %load = load <2 x i64>, <2 x i64>* %__b
2885 %1 = bitcast <2 x i64> %load to <2 x i64>
2886 %2 = icmp eq <2 x i64> %0, %1
2887 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2888 %4 = bitcast <8 x i1> %3 to i8
2892 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2893 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2894 ; VLX: # %bb.0: # %entry
2895 ; VLX-NEXT: kmovd %edi, %k1
2896 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2897 ; VLX-NEXT: kmovd %k0, %eax
2898 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2901 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2902 ; NoVLX: # %bb.0: # %entry
2903 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2904 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2905 ; NoVLX-NEXT: kmovw %edi, %k1
2906 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2907 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2908 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2909 ; NoVLX-NEXT: kmovw %k0, %eax
2910 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2911 ; NoVLX-NEXT: vzeroupper
2914 %0 = bitcast <2 x i64> %__a to <2 x i64>
2915 %1 = bitcast <2 x i64> %__b to <2 x i64>
2916 %2 = icmp eq <2 x i64> %0, %1
2917 %3 = bitcast i8 %__u to <8 x i1>
2918 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2919 %4 = and <2 x i1> %2, %extract.i
2920 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2921 %6 = bitcast <8 x i1> %5 to i8
2925 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2926 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2927 ; VLX: # %bb.0: # %entry
2928 ; VLX-NEXT: kmovd %edi, %k1
2929 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2930 ; VLX-NEXT: kmovd %k0, %eax
2931 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2934 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2935 ; NoVLX: # %bb.0: # %entry
2936 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2937 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2938 ; NoVLX-NEXT: kmovw %edi, %k1
2939 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2940 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2941 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2942 ; NoVLX-NEXT: kmovw %k0, %eax
2943 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2944 ; NoVLX-NEXT: vzeroupper
2947 %0 = bitcast <2 x i64> %__a to <2 x i64>
2948 %load = load <2 x i64>, <2 x i64>* %__b
2949 %1 = bitcast <2 x i64> %load to <2 x i64>
2950 %2 = icmp eq <2 x i64> %0, %1
2951 %3 = bitcast i8 %__u to <8 x i1>
2952 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2953 %4 = and <2 x i1> %2, %extract.i
2954 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2955 %6 = bitcast <8 x i1> %5 to i8
2960 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
2961 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
2962 ; VLX: # %bb.0: # %entry
2963 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
2964 ; VLX-NEXT: kmovd %k0, %eax
2965 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2968 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
2969 ; NoVLX: # %bb.0: # %entry
2970 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2971 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
2972 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2973 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2974 ; NoVLX-NEXT: kmovw %k0, %eax
2975 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2976 ; NoVLX-NEXT: vzeroupper
2979 %0 = bitcast <2 x i64> %__a to <2 x i64>
2980 %load = load i64, i64* %__b
2981 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2982 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2983 %2 = icmp eq <2 x i64> %0, %1
2984 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2985 %4 = bitcast <8 x i1> %3 to i8
2989 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
2990 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
2991 ; VLX: # %bb.0: # %entry
2992 ; VLX-NEXT: kmovd %edi, %k1
2993 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
2994 ; VLX-NEXT: kmovd %k0, %eax
2995 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2998 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
2999 ; NoVLX: # %bb.0: # %entry
3000 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3001 ; NoVLX-NEXT: kmovw %edi, %k1
3002 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3003 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3004 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3005 ; NoVLX-NEXT: kmovw %k0, %eax
3006 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3007 ; NoVLX-NEXT: vzeroupper
3010 %0 = bitcast <2 x i64> %__a to <2 x i64>
3011 %load = load i64, i64* %__b
3012 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3013 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3014 %2 = icmp eq <2 x i64> %0, %1
3015 %3 = bitcast i8 %__u to <8 x i1>
3016 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3017 %4 = and <2 x i1> %extract.i, %2
3018 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3019 %6 = bitcast <8 x i1> %5 to i8
3024 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3025 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3026 ; VLX: # %bb.0: # %entry
3027 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3028 ; VLX-NEXT: kmovd %k0, %eax
3029 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3032 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3033 ; NoVLX: # %bb.0: # %entry
3034 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3035 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3036 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3037 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3038 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3039 ; NoVLX-NEXT: kmovw %k0, %eax
3040 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3041 ; NoVLX-NEXT: vzeroupper
3044 %0 = bitcast <2 x i64> %__a to <2 x i64>
3045 %1 = bitcast <2 x i64> %__b to <2 x i64>
3046 %2 = icmp eq <2 x i64> %0, %1
3047 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3048 %4 = bitcast <16 x i1> %3 to i16
3052 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3053 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3054 ; VLX: # %bb.0: # %entry
3055 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3056 ; VLX-NEXT: kmovd %k0, %eax
3057 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3060 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3061 ; NoVLX: # %bb.0: # %entry
3062 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3063 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3064 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3065 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3066 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3067 ; NoVLX-NEXT: kmovw %k0, %eax
3068 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3069 ; NoVLX-NEXT: vzeroupper
3072 %0 = bitcast <2 x i64> %__a to <2 x i64>
3073 %load = load <2 x i64>, <2 x i64>* %__b
3074 %1 = bitcast <2 x i64> %load to <2 x i64>
3075 %2 = icmp eq <2 x i64> %0, %1
3076 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3077 %4 = bitcast <16 x i1> %3 to i16
3081 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3082 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3083 ; VLX: # %bb.0: # %entry
3084 ; VLX-NEXT: kmovd %edi, %k1
3085 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3086 ; VLX-NEXT: kmovd %k0, %eax
3087 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3090 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3091 ; NoVLX: # %bb.0: # %entry
3092 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3093 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3094 ; NoVLX-NEXT: kmovw %edi, %k1
3095 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3096 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3097 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3098 ; NoVLX-NEXT: kmovw %k0, %eax
3099 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3100 ; NoVLX-NEXT: vzeroupper
3103 %0 = bitcast <2 x i64> %__a to <2 x i64>
3104 %1 = bitcast <2 x i64> %__b to <2 x i64>
3105 %2 = icmp eq <2 x i64> %0, %1
3106 %3 = bitcast i8 %__u to <8 x i1>
3107 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3108 %4 = and <2 x i1> %2, %extract.i
3109 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3110 %6 = bitcast <16 x i1> %5 to i16
3114 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3115 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3116 ; VLX: # %bb.0: # %entry
3117 ; VLX-NEXT: kmovd %edi, %k1
3118 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3119 ; VLX-NEXT: kmovd %k0, %eax
3120 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3123 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3124 ; NoVLX: # %bb.0: # %entry
3125 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3126 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3127 ; NoVLX-NEXT: kmovw %edi, %k1
3128 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3129 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3130 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3131 ; NoVLX-NEXT: kmovw %k0, %eax
3132 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3133 ; NoVLX-NEXT: vzeroupper
3136 %0 = bitcast <2 x i64> %__a to <2 x i64>
3137 %load = load <2 x i64>, <2 x i64>* %__b
3138 %1 = bitcast <2 x i64> %load to <2 x i64>
3139 %2 = icmp eq <2 x i64> %0, %1
3140 %3 = bitcast i8 %__u to <8 x i1>
3141 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3142 %4 = and <2 x i1> %2, %extract.i
3143 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3144 %6 = bitcast <16 x i1> %5 to i16
3149 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3150 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3151 ; VLX: # %bb.0: # %entry
3152 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3153 ; VLX-NEXT: kmovd %k0, %eax
3154 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3157 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3158 ; NoVLX: # %bb.0: # %entry
3159 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3160 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3161 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3162 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3163 ; NoVLX-NEXT: kmovw %k0, %eax
3164 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3165 ; NoVLX-NEXT: vzeroupper
3168 %0 = bitcast <2 x i64> %__a to <2 x i64>
3169 %load = load i64, i64* %__b
3170 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3171 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3172 %2 = icmp eq <2 x i64> %0, %1
3173 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3174 %4 = bitcast <16 x i1> %3 to i16
3178 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3179 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3180 ; VLX: # %bb.0: # %entry
3181 ; VLX-NEXT: kmovd %edi, %k1
3182 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3183 ; VLX-NEXT: kmovd %k0, %eax
3184 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3187 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3188 ; NoVLX: # %bb.0: # %entry
3189 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3190 ; NoVLX-NEXT: kmovw %edi, %k1
3191 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3192 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3193 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3194 ; NoVLX-NEXT: kmovw %k0, %eax
3195 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3196 ; NoVLX-NEXT: vzeroupper
3199 %0 = bitcast <2 x i64> %__a to <2 x i64>
3200 %load = load i64, i64* %__b
3201 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3202 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3203 %2 = icmp eq <2 x i64> %0, %1
3204 %3 = bitcast i8 %__u to <8 x i1>
3205 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3206 %4 = and <2 x i1> %extract.i, %2
3207 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3208 %6 = bitcast <16 x i1> %5 to i16
3213 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3214 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3215 ; VLX: # %bb.0: # %entry
3216 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3217 ; VLX-NEXT: kmovd %k0, %eax
3220 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3221 ; NoVLX: # %bb.0: # %entry
3222 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3223 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3224 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3225 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3226 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3227 ; NoVLX-NEXT: kmovw %k0, %eax
3228 ; NoVLX-NEXT: vzeroupper
3231 %0 = bitcast <2 x i64> %__a to <2 x i64>
3232 %1 = bitcast <2 x i64> %__b to <2 x i64>
3233 %2 = icmp eq <2 x i64> %0, %1
3234 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3235 %4 = bitcast <32 x i1> %3 to i32
3239 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3240 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3241 ; VLX: # %bb.0: # %entry
3242 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3243 ; VLX-NEXT: kmovd %k0, %eax
3246 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3247 ; NoVLX: # %bb.0: # %entry
3248 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3249 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3250 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3251 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3252 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3253 ; NoVLX-NEXT: kmovw %k0, %eax
3254 ; NoVLX-NEXT: vzeroupper
3257 %0 = bitcast <2 x i64> %__a to <2 x i64>
3258 %load = load <2 x i64>, <2 x i64>* %__b
3259 %1 = bitcast <2 x i64> %load to <2 x i64>
3260 %2 = icmp eq <2 x i64> %0, %1
3261 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3262 %4 = bitcast <32 x i1> %3 to i32
3266 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3267 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3268 ; VLX: # %bb.0: # %entry
3269 ; VLX-NEXT: kmovd %edi, %k1
3270 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3271 ; VLX-NEXT: kmovd %k0, %eax
3274 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3275 ; NoVLX: # %bb.0: # %entry
3276 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3277 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3278 ; NoVLX-NEXT: kmovw %edi, %k1
3279 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3280 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3281 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3282 ; NoVLX-NEXT: kmovw %k0, %eax
3283 ; NoVLX-NEXT: vzeroupper
3286 %0 = bitcast <2 x i64> %__a to <2 x i64>
3287 %1 = bitcast <2 x i64> %__b to <2 x i64>
3288 %2 = icmp eq <2 x i64> %0, %1
3289 %3 = bitcast i8 %__u to <8 x i1>
3290 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3291 %4 = and <2 x i1> %2, %extract.i
3292 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3293 %6 = bitcast <32 x i1> %5 to i32
3297 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3298 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3299 ; VLX: # %bb.0: # %entry
3300 ; VLX-NEXT: kmovd %edi, %k1
3301 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3302 ; VLX-NEXT: kmovd %k0, %eax
3305 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3306 ; NoVLX: # %bb.0: # %entry
3307 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3308 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3309 ; NoVLX-NEXT: kmovw %edi, %k1
3310 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3311 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3312 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3313 ; NoVLX-NEXT: kmovw %k0, %eax
3314 ; NoVLX-NEXT: vzeroupper
3317 %0 = bitcast <2 x i64> %__a to <2 x i64>
3318 %load = load <2 x i64>, <2 x i64>* %__b
3319 %1 = bitcast <2 x i64> %load to <2 x i64>
3320 %2 = icmp eq <2 x i64> %0, %1
3321 %3 = bitcast i8 %__u to <8 x i1>
3322 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3323 %4 = and <2 x i1> %2, %extract.i
3324 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3325 %6 = bitcast <32 x i1> %5 to i32
3330 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3331 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3332 ; VLX: # %bb.0: # %entry
3333 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3334 ; VLX-NEXT: kmovd %k0, %eax
3337 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3338 ; NoVLX: # %bb.0: # %entry
3339 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3340 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3341 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3342 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3343 ; NoVLX-NEXT: kmovw %k0, %eax
3344 ; NoVLX-NEXT: vzeroupper
3347 %0 = bitcast <2 x i64> %__a to <2 x i64>
3348 %load = load i64, i64* %__b
3349 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3350 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3351 %2 = icmp eq <2 x i64> %0, %1
3352 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3353 %4 = bitcast <32 x i1> %3 to i32
3357 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3358 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3359 ; VLX: # %bb.0: # %entry
3360 ; VLX-NEXT: kmovd %edi, %k1
3361 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3362 ; VLX-NEXT: kmovd %k0, %eax
3365 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3366 ; NoVLX: # %bb.0: # %entry
3367 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3368 ; NoVLX-NEXT: kmovw %edi, %k1
3369 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3370 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3371 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3372 ; NoVLX-NEXT: kmovw %k0, %eax
3373 ; NoVLX-NEXT: vzeroupper
3376 %0 = bitcast <2 x i64> %__a to <2 x i64>
3377 %load = load i64, i64* %__b
3378 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3379 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3380 %2 = icmp eq <2 x i64> %0, %1
3381 %3 = bitcast i8 %__u to <8 x i1>
3382 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3383 %4 = and <2 x i1> %extract.i, %2
3384 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3385 %6 = bitcast <32 x i1> %5 to i32
3390 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3391 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3392 ; VLX: # %bb.0: # %entry
3393 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3394 ; VLX-NEXT: kmovq %k0, %rax
3397 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3398 ; NoVLX: # %bb.0: # %entry
3399 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3400 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3401 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3402 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3403 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3404 ; NoVLX-NEXT: kmovw %k0, %eax
3405 ; NoVLX-NEXT: vzeroupper
3408 %0 = bitcast <2 x i64> %__a to <2 x i64>
3409 %1 = bitcast <2 x i64> %__b to <2 x i64>
3410 %2 = icmp eq <2 x i64> %0, %1
3411 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3412 %4 = bitcast <64 x i1> %3 to i64
3416 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3417 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3418 ; VLX: # %bb.0: # %entry
3419 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3420 ; VLX-NEXT: kmovq %k0, %rax
3423 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3424 ; NoVLX: # %bb.0: # %entry
3425 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3426 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3427 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3428 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3429 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3430 ; NoVLX-NEXT: kmovw %k0, %eax
3431 ; NoVLX-NEXT: vzeroupper
3434 %0 = bitcast <2 x i64> %__a to <2 x i64>
3435 %load = load <2 x i64>, <2 x i64>* %__b
3436 %1 = bitcast <2 x i64> %load to <2 x i64>
3437 %2 = icmp eq <2 x i64> %0, %1
3438 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3439 %4 = bitcast <64 x i1> %3 to i64
3443 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3444 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3445 ; VLX: # %bb.0: # %entry
3446 ; VLX-NEXT: kmovd %edi, %k1
3447 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3448 ; VLX-NEXT: kmovq %k0, %rax
3451 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3452 ; NoVLX: # %bb.0: # %entry
3453 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3454 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3455 ; NoVLX-NEXT: kmovw %edi, %k1
3456 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3457 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3458 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3459 ; NoVLX-NEXT: kmovw %k0, %eax
3460 ; NoVLX-NEXT: vzeroupper
3463 %0 = bitcast <2 x i64> %__a to <2 x i64>
3464 %1 = bitcast <2 x i64> %__b to <2 x i64>
3465 %2 = icmp eq <2 x i64> %0, %1
3466 %3 = bitcast i8 %__u to <8 x i1>
3467 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3468 %4 = and <2 x i1> %2, %extract.i
3469 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3470 %6 = bitcast <64 x i1> %5 to i64
3474 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3475 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3476 ; VLX: # %bb.0: # %entry
3477 ; VLX-NEXT: kmovd %edi, %k1
3478 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3479 ; VLX-NEXT: kmovq %k0, %rax
3482 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3483 ; NoVLX: # %bb.0: # %entry
3484 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3485 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3486 ; NoVLX-NEXT: kmovw %edi, %k1
3487 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3488 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3489 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3490 ; NoVLX-NEXT: kmovw %k0, %eax
3491 ; NoVLX-NEXT: vzeroupper
3494 %0 = bitcast <2 x i64> %__a to <2 x i64>
3495 %load = load <2 x i64>, <2 x i64>* %__b
3496 %1 = bitcast <2 x i64> %load to <2 x i64>
3497 %2 = icmp eq <2 x i64> %0, %1
3498 %3 = bitcast i8 %__u to <8 x i1>
3499 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3500 %4 = and <2 x i1> %2, %extract.i
3501 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3502 %6 = bitcast <64 x i1> %5 to i64
3507 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3508 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3509 ; VLX: # %bb.0: # %entry
3510 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3511 ; VLX-NEXT: kmovq %k0, %rax
3514 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3515 ; NoVLX: # %bb.0: # %entry
3516 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3517 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3518 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3519 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3520 ; NoVLX-NEXT: kmovw %k0, %eax
3521 ; NoVLX-NEXT: vzeroupper
3524 %0 = bitcast <2 x i64> %__a to <2 x i64>
3525 %load = load i64, i64* %__b
3526 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3527 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3528 %2 = icmp eq <2 x i64> %0, %1
3529 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3530 %4 = bitcast <64 x i1> %3 to i64
3534 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3535 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3536 ; VLX: # %bb.0: # %entry
3537 ; VLX-NEXT: kmovd %edi, %k1
3538 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3539 ; VLX-NEXT: kmovq %k0, %rax
3542 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3543 ; NoVLX: # %bb.0: # %entry
3544 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3545 ; NoVLX-NEXT: kmovw %edi, %k1
3546 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3547 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3548 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3549 ; NoVLX-NEXT: kmovw %k0, %eax
3550 ; NoVLX-NEXT: vzeroupper
3553 %0 = bitcast <2 x i64> %__a to <2 x i64>
3554 %load = load i64, i64* %__b
3555 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3556 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3557 %2 = icmp eq <2 x i64> %0, %1
3558 %3 = bitcast i8 %__u to <8 x i1>
3559 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3560 %4 = and <2 x i1> %extract.i, %2
3561 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3562 %6 = bitcast <64 x i1> %5 to i64
3567 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3568 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3569 ; VLX: # %bb.0: # %entry
3570 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3571 ; VLX-NEXT: kmovd %k0, %eax
3572 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3573 ; VLX-NEXT: vzeroupper
3576 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3577 ; NoVLX: # %bb.0: # %entry
3578 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3579 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3580 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3581 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3582 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3583 ; NoVLX-NEXT: kmovw %k0, %eax
3584 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3585 ; NoVLX-NEXT: vzeroupper
3588 %0 = bitcast <4 x i64> %__a to <4 x i64>
3589 %1 = bitcast <4 x i64> %__b to <4 x i64>
3590 %2 = icmp eq <4 x i64> %0, %1
3591 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3592 %4 = bitcast <8 x i1> %3 to i8
3596 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3597 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3598 ; VLX: # %bb.0: # %entry
3599 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
3600 ; VLX-NEXT: kmovd %k0, %eax
3601 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3602 ; VLX-NEXT: vzeroupper
3605 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3606 ; NoVLX: # %bb.0: # %entry
3607 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3608 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
3609 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3610 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3611 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3612 ; NoVLX-NEXT: kmovw %k0, %eax
3613 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3614 ; NoVLX-NEXT: vzeroupper
3617 %0 = bitcast <4 x i64> %__a to <4 x i64>
3618 %load = load <4 x i64>, <4 x i64>* %__b
3619 %1 = bitcast <4 x i64> %load to <4 x i64>
3620 %2 = icmp eq <4 x i64> %0, %1
3621 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3622 %4 = bitcast <8 x i1> %3 to i8
3626 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3627 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3628 ; VLX: # %bb.0: # %entry
3629 ; VLX-NEXT: kmovd %edi, %k1
3630 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3631 ; VLX-NEXT: kmovd %k0, %eax
3632 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3633 ; VLX-NEXT: vzeroupper
3636 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3637 ; NoVLX: # %bb.0: # %entry
3638 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3639 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3640 ; NoVLX-NEXT: kmovw %edi, %k1
3641 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3642 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3643 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3644 ; NoVLX-NEXT: kmovw %k0, %eax
3645 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3646 ; NoVLX-NEXT: vzeroupper
3649 %0 = bitcast <4 x i64> %__a to <4 x i64>
3650 %1 = bitcast <4 x i64> %__b to <4 x i64>
3651 %2 = icmp eq <4 x i64> %0, %1
3652 %3 = bitcast i8 %__u to <8 x i1>
3653 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3654 %4 = and <4 x i1> %2, %extract.i
3655 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3656 %6 = bitcast <8 x i1> %5 to i8
3660 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3661 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3662 ; VLX: # %bb.0: # %entry
3663 ; VLX-NEXT: kmovd %edi, %k1
3664 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3665 ; VLX-NEXT: kmovd %k0, %eax
3666 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3667 ; VLX-NEXT: vzeroupper
3670 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3671 ; NoVLX: # %bb.0: # %entry
3672 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3673 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
3674 ; NoVLX-NEXT: kmovw %edi, %k1
3675 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3676 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3677 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3678 ; NoVLX-NEXT: kmovw %k0, %eax
3679 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3680 ; NoVLX-NEXT: vzeroupper
3683 %0 = bitcast <4 x i64> %__a to <4 x i64>
3684 %load = load <4 x i64>, <4 x i64>* %__b
3685 %1 = bitcast <4 x i64> %load to <4 x i64>
3686 %2 = icmp eq <4 x i64> %0, %1
3687 %3 = bitcast i8 %__u to <8 x i1>
3688 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3689 %4 = and <4 x i1> %2, %extract.i
3690 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3691 %6 = bitcast <8 x i1> %5 to i8
3696 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
3697 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3698 ; VLX: # %bb.0: # %entry
3699 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3700 ; VLX-NEXT: kmovd %k0, %eax
3701 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3702 ; VLX-NEXT: vzeroupper
3705 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3706 ; NoVLX: # %bb.0: # %entry
3707 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3708 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3709 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3710 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3711 ; NoVLX-NEXT: kmovw %k0, %eax
3712 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3713 ; NoVLX-NEXT: vzeroupper
3716 %0 = bitcast <4 x i64> %__a to <4 x i64>
3717 %load = load i64, i64* %__b
3718 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3719 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3720 %2 = icmp eq <4 x i64> %0, %1
3721 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3722 %4 = bitcast <8 x i1> %3 to i8
3726 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
3727 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3728 ; VLX: # %bb.0: # %entry
3729 ; VLX-NEXT: kmovd %edi, %k1
3730 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3731 ; VLX-NEXT: kmovd %k0, %eax
3732 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3733 ; VLX-NEXT: vzeroupper
3736 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3737 ; NoVLX: # %bb.0: # %entry
3738 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3739 ; NoVLX-NEXT: kmovw %edi, %k1
3740 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3741 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3742 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3743 ; NoVLX-NEXT: kmovw %k0, %eax
3744 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3745 ; NoVLX-NEXT: vzeroupper
3748 %0 = bitcast <4 x i64> %__a to <4 x i64>
3749 %load = load i64, i64* %__b
3750 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3751 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3752 %2 = icmp eq <4 x i64> %0, %1
3753 %3 = bitcast i8 %__u to <8 x i1>
3754 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3755 %4 = and <4 x i1> %extract.i, %2
3756 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3757 %6 = bitcast <8 x i1> %5 to i8
3762 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3763 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3764 ; VLX: # %bb.0: # %entry
3765 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3766 ; VLX-NEXT: kmovd %k0, %eax
3767 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3768 ; VLX-NEXT: vzeroupper
3771 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3772 ; NoVLX: # %bb.0: # %entry
3773 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3774 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3775 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3776 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3777 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3778 ; NoVLX-NEXT: kmovw %k0, %eax
3779 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3780 ; NoVLX-NEXT: vzeroupper
3783 %0 = bitcast <4 x i64> %__a to <4 x i64>
3784 %1 = bitcast <4 x i64> %__b to <4 x i64>
3785 %2 = icmp eq <4 x i64> %0, %1
3786 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3787 %4 = bitcast <16 x i1> %3 to i16
3791 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3792 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3793 ; VLX: # %bb.0: # %entry
3794 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
3795 ; VLX-NEXT: kmovd %k0, %eax
3796 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3797 ; VLX-NEXT: vzeroupper
3800 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3801 ; NoVLX: # %bb.0: # %entry
3802 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3803 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
3804 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3805 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3806 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3807 ; NoVLX-NEXT: kmovw %k0, %eax
3808 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3809 ; NoVLX-NEXT: vzeroupper
3812 %0 = bitcast <4 x i64> %__a to <4 x i64>
3813 %load = load <4 x i64>, <4 x i64>* %__b
3814 %1 = bitcast <4 x i64> %load to <4 x i64>
3815 %2 = icmp eq <4 x i64> %0, %1
3816 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3817 %4 = bitcast <16 x i1> %3 to i16
3821 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3822 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3823 ; VLX: # %bb.0: # %entry
3824 ; VLX-NEXT: kmovd %edi, %k1
3825 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3826 ; VLX-NEXT: kmovd %k0, %eax
3827 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3828 ; VLX-NEXT: vzeroupper
3831 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3832 ; NoVLX: # %bb.0: # %entry
3833 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3834 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3835 ; NoVLX-NEXT: kmovw %edi, %k1
3836 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3837 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3838 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3839 ; NoVLX-NEXT: kmovw %k0, %eax
3840 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3841 ; NoVLX-NEXT: vzeroupper
3844 %0 = bitcast <4 x i64> %__a to <4 x i64>
3845 %1 = bitcast <4 x i64> %__b to <4 x i64>
3846 %2 = icmp eq <4 x i64> %0, %1
3847 %3 = bitcast i8 %__u to <8 x i1>
3848 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3849 %4 = and <4 x i1> %2, %extract.i
3850 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3851 %6 = bitcast <16 x i1> %5 to i16
3855 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3856 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3857 ; VLX: # %bb.0: # %entry
3858 ; VLX-NEXT: kmovd %edi, %k1
3859 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3860 ; VLX-NEXT: kmovd %k0, %eax
3861 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3862 ; VLX-NEXT: vzeroupper
3865 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3866 ; NoVLX: # %bb.0: # %entry
3867 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3868 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
3869 ; NoVLX-NEXT: kmovw %edi, %k1
3870 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3871 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3872 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3873 ; NoVLX-NEXT: kmovw %k0, %eax
3874 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3875 ; NoVLX-NEXT: vzeroupper
3878 %0 = bitcast <4 x i64> %__a to <4 x i64>
3879 %load = load <4 x i64>, <4 x i64>* %__b
3880 %1 = bitcast <4 x i64> %load to <4 x i64>
3881 %2 = icmp eq <4 x i64> %0, %1
3882 %3 = bitcast i8 %__u to <8 x i1>
3883 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3884 %4 = and <4 x i1> %2, %extract.i
3885 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3886 %6 = bitcast <16 x i1> %5 to i16
3891 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
3892 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3893 ; VLX: # %bb.0: # %entry
3894 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3895 ; VLX-NEXT: kmovd %k0, %eax
3896 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3897 ; VLX-NEXT: vzeroupper
3900 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3901 ; NoVLX: # %bb.0: # %entry
3902 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3903 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3904 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3905 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3906 ; NoVLX-NEXT: kmovw %k0, %eax
3907 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3908 ; NoVLX-NEXT: vzeroupper
3911 %0 = bitcast <4 x i64> %__a to <4 x i64>
3912 %load = load i64, i64* %__b
3913 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3914 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3915 %2 = icmp eq <4 x i64> %0, %1
3916 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3917 %4 = bitcast <16 x i1> %3 to i16
3921 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
3922 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3923 ; VLX: # %bb.0: # %entry
3924 ; VLX-NEXT: kmovd %edi, %k1
3925 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3926 ; VLX-NEXT: kmovd %k0, %eax
3927 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3928 ; VLX-NEXT: vzeroupper
3931 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3932 ; NoVLX: # %bb.0: # %entry
3933 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3934 ; NoVLX-NEXT: kmovw %edi, %k1
3935 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3936 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3937 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3938 ; NoVLX-NEXT: kmovw %k0, %eax
3939 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3940 ; NoVLX-NEXT: vzeroupper
3943 %0 = bitcast <4 x i64> %__a to <4 x i64>
3944 %load = load i64, i64* %__b
3945 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3946 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3947 %2 = icmp eq <4 x i64> %0, %1
3948 %3 = bitcast i8 %__u to <8 x i1>
3949 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3950 %4 = and <4 x i1> %extract.i, %2
3951 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3952 %6 = bitcast <16 x i1> %5 to i16
3957 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3958 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
3959 ; VLX: # %bb.0: # %entry
3960 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3961 ; VLX-NEXT: kmovd %k0, %eax
3962 ; VLX-NEXT: vzeroupper
3965 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
3966 ; NoVLX: # %bb.0: # %entry
3967 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3968 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3969 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3970 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3971 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3972 ; NoVLX-NEXT: kmovw %k0, %eax
3973 ; NoVLX-NEXT: vzeroupper
3976 %0 = bitcast <4 x i64> %__a to <4 x i64>
3977 %1 = bitcast <4 x i64> %__b to <4 x i64>
3978 %2 = icmp eq <4 x i64> %0, %1
3979 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3980 %4 = bitcast <32 x i1> %3 to i32
3984 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3985 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
3986 ; VLX: # %bb.0: # %entry
3987 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
3988 ; VLX-NEXT: kmovd %k0, %eax
3989 ; VLX-NEXT: vzeroupper
3992 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
3993 ; NoVLX: # %bb.0: # %entry
3994 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3995 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
3996 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3997 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3998 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3999 ; NoVLX-NEXT: kmovw %k0, %eax
4000 ; NoVLX-NEXT: vzeroupper
4003 %0 = bitcast <4 x i64> %__a to <4 x i64>
4004 %load = load <4 x i64>, <4 x i64>* %__b
4005 %1 = bitcast <4 x i64> %load to <4 x i64>
4006 %2 = icmp eq <4 x i64> %0, %1
4007 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4008 %4 = bitcast <32 x i1> %3 to i32
4012 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4013 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4014 ; VLX: # %bb.0: # %entry
4015 ; VLX-NEXT: kmovd %edi, %k1
4016 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4017 ; VLX-NEXT: kmovd %k0, %eax
4018 ; VLX-NEXT: vzeroupper
4021 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4022 ; NoVLX: # %bb.0: # %entry
4023 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4024 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4025 ; NoVLX-NEXT: kmovw %edi, %k1
4026 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4027 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4028 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4029 ; NoVLX-NEXT: kmovw %k0, %eax
4030 ; NoVLX-NEXT: vzeroupper
4033 %0 = bitcast <4 x i64> %__a to <4 x i64>
4034 %1 = bitcast <4 x i64> %__b to <4 x i64>
4035 %2 = icmp eq <4 x i64> %0, %1
4036 %3 = bitcast i8 %__u to <8 x i1>
4037 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4038 %4 = and <4 x i1> %2, %extract.i
4039 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4040 %6 = bitcast <32 x i1> %5 to i32
4044 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4045 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4046 ; VLX: # %bb.0: # %entry
4047 ; VLX-NEXT: kmovd %edi, %k1
4048 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4049 ; VLX-NEXT: kmovd %k0, %eax
4050 ; VLX-NEXT: vzeroupper
4053 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4054 ; NoVLX: # %bb.0: # %entry
4055 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4056 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
4057 ; NoVLX-NEXT: kmovw %edi, %k1
4058 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4059 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4060 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4061 ; NoVLX-NEXT: kmovw %k0, %eax
4062 ; NoVLX-NEXT: vzeroupper
4065 %0 = bitcast <4 x i64> %__a to <4 x i64>
4066 %load = load <4 x i64>, <4 x i64>* %__b
4067 %1 = bitcast <4 x i64> %load to <4 x i64>
4068 %2 = icmp eq <4 x i64> %0, %1
4069 %3 = bitcast i8 %__u to <8 x i1>
4070 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4071 %4 = and <4 x i1> %2, %extract.i
4072 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4073 %6 = bitcast <32 x i1> %5 to i32
4078 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
4079 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4080 ; VLX: # %bb.0: # %entry
4081 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4082 ; VLX-NEXT: kmovd %k0, %eax
4083 ; VLX-NEXT: vzeroupper
4086 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4087 ; NoVLX: # %bb.0: # %entry
4088 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4089 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4090 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4091 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4092 ; NoVLX-NEXT: kmovw %k0, %eax
4093 ; NoVLX-NEXT: vzeroupper
4096 %0 = bitcast <4 x i64> %__a to <4 x i64>
4097 %load = load i64, i64* %__b
4098 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4099 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4100 %2 = icmp eq <4 x i64> %0, %1
4101 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4102 %4 = bitcast <32 x i1> %3 to i32
4106 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
4107 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4108 ; VLX: # %bb.0: # %entry
4109 ; VLX-NEXT: kmovd %edi, %k1
4110 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4111 ; VLX-NEXT: kmovd %k0, %eax
4112 ; VLX-NEXT: vzeroupper
4115 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4116 ; NoVLX: # %bb.0: # %entry
4117 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4118 ; NoVLX-NEXT: kmovw %edi, %k1
4119 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4120 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4121 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4122 ; NoVLX-NEXT: kmovw %k0, %eax
4123 ; NoVLX-NEXT: vzeroupper
4126 %0 = bitcast <4 x i64> %__a to <4 x i64>
4127 %load = load i64, i64* %__b
4128 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4129 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4130 %2 = icmp eq <4 x i64> %0, %1
4131 %3 = bitcast i8 %__u to <8 x i1>
4132 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4133 %4 = and <4 x i1> %extract.i, %2
4134 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4135 %6 = bitcast <32 x i1> %5 to i32
4140 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4141 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4142 ; VLX: # %bb.0: # %entry
4143 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
4144 ; VLX-NEXT: kmovq %k0, %rax
4145 ; VLX-NEXT: vzeroupper
4148 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4149 ; NoVLX: # %bb.0: # %entry
4150 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4151 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4152 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4153 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4154 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4155 ; NoVLX-NEXT: kmovw %k0, %eax
4156 ; NoVLX-NEXT: vzeroupper
4159 %0 = bitcast <4 x i64> %__a to <4 x i64>
4160 %1 = bitcast <4 x i64> %__b to <4 x i64>
4161 %2 = icmp eq <4 x i64> %0, %1
4162 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4163 %4 = bitcast <64 x i1> %3 to i64
4167 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4168 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4169 ; VLX: # %bb.0: # %entry
4170 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
4171 ; VLX-NEXT: kmovq %k0, %rax
4172 ; VLX-NEXT: vzeroupper
4175 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4176 ; NoVLX: # %bb.0: # %entry
4177 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4178 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
4179 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4180 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4181 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4182 ; NoVLX-NEXT: kmovw %k0, %eax
4183 ; NoVLX-NEXT: vzeroupper
4186 %0 = bitcast <4 x i64> %__a to <4 x i64>
4187 %load = load <4 x i64>, <4 x i64>* %__b
4188 %1 = bitcast <4 x i64> %load to <4 x i64>
4189 %2 = icmp eq <4 x i64> %0, %1
4190 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4191 %4 = bitcast <64 x i1> %3 to i64
4195 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4196 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4197 ; VLX: # %bb.0: # %entry
4198 ; VLX-NEXT: kmovd %edi, %k1
4199 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4200 ; VLX-NEXT: kmovq %k0, %rax
4201 ; VLX-NEXT: vzeroupper
4204 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4205 ; NoVLX: # %bb.0: # %entry
4206 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4207 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4208 ; NoVLX-NEXT: kmovw %edi, %k1
4209 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4210 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4211 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4212 ; NoVLX-NEXT: kmovw %k0, %eax
4213 ; NoVLX-NEXT: vzeroupper
4216 %0 = bitcast <4 x i64> %__a to <4 x i64>
4217 %1 = bitcast <4 x i64> %__b to <4 x i64>
4218 %2 = icmp eq <4 x i64> %0, %1
4219 %3 = bitcast i8 %__u to <8 x i1>
4220 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4221 %4 = and <4 x i1> %2, %extract.i
4222 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4223 %6 = bitcast <64 x i1> %5 to i64
4227 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4228 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4229 ; VLX: # %bb.0: # %entry
4230 ; VLX-NEXT: kmovd %edi, %k1
4231 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4232 ; VLX-NEXT: kmovq %k0, %rax
4233 ; VLX-NEXT: vzeroupper
4236 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4237 ; NoVLX: # %bb.0: # %entry
4238 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4239 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
4240 ; NoVLX-NEXT: kmovw %edi, %k1
4241 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4242 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4243 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4244 ; NoVLX-NEXT: kmovw %k0, %eax
4245 ; NoVLX-NEXT: vzeroupper
4248 %0 = bitcast <4 x i64> %__a to <4 x i64>
4249 %load = load <4 x i64>, <4 x i64>* %__b
4250 %1 = bitcast <4 x i64> %load to <4 x i64>
4251 %2 = icmp eq <4 x i64> %0, %1
4252 %3 = bitcast i8 %__u to <8 x i1>
4253 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4254 %4 = and <4 x i1> %2, %extract.i
4255 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4256 %6 = bitcast <64 x i1> %5 to i64
4261 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
4262 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4263 ; VLX: # %bb.0: # %entry
4264 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4265 ; VLX-NEXT: kmovq %k0, %rax
4266 ; VLX-NEXT: vzeroupper
4269 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4270 ; NoVLX: # %bb.0: # %entry
4271 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4272 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4273 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4274 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4275 ; NoVLX-NEXT: kmovw %k0, %eax
4276 ; NoVLX-NEXT: vzeroupper
4279 %0 = bitcast <4 x i64> %__a to <4 x i64>
4280 %load = load i64, i64* %__b
4281 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4282 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4283 %2 = icmp eq <4 x i64> %0, %1
4284 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4285 %4 = bitcast <64 x i1> %3 to i64
4289 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
4290 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4291 ; VLX: # %bb.0: # %entry
4292 ; VLX-NEXT: kmovd %edi, %k1
4293 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4294 ; VLX-NEXT: kmovq %k0, %rax
4295 ; VLX-NEXT: vzeroupper
4298 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4299 ; NoVLX: # %bb.0: # %entry
4300 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4301 ; NoVLX-NEXT: kmovw %edi, %k1
4302 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4303 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4304 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4305 ; NoVLX-NEXT: kmovw %k0, %eax
4306 ; NoVLX-NEXT: vzeroupper
4309 %0 = bitcast <4 x i64> %__a to <4 x i64>
4310 %load = load i64, i64* %__b
4311 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4312 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4313 %2 = icmp eq <4 x i64> %0, %1
4314 %3 = bitcast i8 %__u to <8 x i1>
4315 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4316 %4 = and <4 x i1> %extract.i, %2
4317 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4318 %6 = bitcast <64 x i1> %5 to i64
4323 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4324 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4325 ; VLX: # %bb.0: # %entry
4326 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4327 ; VLX-NEXT: kmovd %k0, %eax
4328 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4329 ; VLX-NEXT: vzeroupper
4332 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4333 ; NoVLX: # %bb.0: # %entry
4334 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4335 ; NoVLX-NEXT: kmovw %k0, %eax
4336 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4337 ; NoVLX-NEXT: vzeroupper
4340 %0 = bitcast <8 x i64> %__a to <8 x i64>
4341 %1 = bitcast <8 x i64> %__b to <8 x i64>
4342 %2 = icmp eq <8 x i64> %0, %1
4343 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4344 %4 = bitcast <16 x i1> %3 to i16
4348 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4349 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4350 ; VLX: # %bb.0: # %entry
4351 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4352 ; VLX-NEXT: kmovd %k0, %eax
4353 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4354 ; VLX-NEXT: vzeroupper
4357 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4358 ; NoVLX: # %bb.0: # %entry
4359 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4360 ; NoVLX-NEXT: kmovw %k0, %eax
4361 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4362 ; NoVLX-NEXT: vzeroupper
4365 %0 = bitcast <8 x i64> %__a to <8 x i64>
4366 %load = load <8 x i64>, <8 x i64>* %__b
4367 %1 = bitcast <8 x i64> %load to <8 x i64>
4368 %2 = icmp eq <8 x i64> %0, %1
4369 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4370 %4 = bitcast <16 x i1> %3 to i16
4374 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4375 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4376 ; VLX: # %bb.0: # %entry
4377 ; VLX-NEXT: kmovd %edi, %k1
4378 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4379 ; VLX-NEXT: kmovd %k0, %eax
4380 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4381 ; VLX-NEXT: vzeroupper
4384 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4385 ; NoVLX: # %bb.0: # %entry
4386 ; NoVLX-NEXT: kmovw %edi, %k1
4387 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4388 ; NoVLX-NEXT: kmovw %k0, %eax
4389 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4390 ; NoVLX-NEXT: vzeroupper
4393 %0 = bitcast <8 x i64> %__a to <8 x i64>
4394 %1 = bitcast <8 x i64> %__b to <8 x i64>
4395 %2 = icmp eq <8 x i64> %0, %1
4396 %3 = bitcast i8 %__u to <8 x i1>
4397 %4 = and <8 x i1> %2, %3
4398 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4399 %6 = bitcast <16 x i1> %5 to i16
4403 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4404 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4405 ; VLX: # %bb.0: # %entry
4406 ; VLX-NEXT: kmovd %edi, %k1
4407 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4408 ; VLX-NEXT: kmovd %k0, %eax
4409 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4410 ; VLX-NEXT: vzeroupper
4413 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4414 ; NoVLX: # %bb.0: # %entry
4415 ; NoVLX-NEXT: kmovw %edi, %k1
4416 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4417 ; NoVLX-NEXT: kmovw %k0, %eax
4418 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4419 ; NoVLX-NEXT: vzeroupper
4422 %0 = bitcast <8 x i64> %__a to <8 x i64>
4423 %load = load <8 x i64>, <8 x i64>* %__b
4424 %1 = bitcast <8 x i64> %load to <8 x i64>
4425 %2 = icmp eq <8 x i64> %0, %1
4426 %3 = bitcast i8 %__u to <8 x i1>
4427 %4 = and <8 x i1> %2, %3
4428 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4429 %6 = bitcast <16 x i1> %5 to i16
4434 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4435 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4436 ; VLX: # %bb.0: # %entry
4437 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4438 ; VLX-NEXT: kmovd %k0, %eax
4439 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4440 ; VLX-NEXT: vzeroupper
4443 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4444 ; NoVLX: # %bb.0: # %entry
4445 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4446 ; NoVLX-NEXT: kmovw %k0, %eax
4447 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4448 ; NoVLX-NEXT: vzeroupper
4451 %0 = bitcast <8 x i64> %__a to <8 x i64>
4452 %load = load i64, i64* %__b
4453 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4454 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4455 %2 = icmp eq <8 x i64> %0, %1
4456 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4457 %4 = bitcast <16 x i1> %3 to i16
4461 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4462 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4463 ; VLX: # %bb.0: # %entry
4464 ; VLX-NEXT: kmovd %edi, %k1
4465 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4466 ; VLX-NEXT: kmovd %k0, %eax
4467 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4468 ; VLX-NEXT: vzeroupper
4471 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4472 ; NoVLX: # %bb.0: # %entry
4473 ; NoVLX-NEXT: kmovw %edi, %k1
4474 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4475 ; NoVLX-NEXT: kmovw %k0, %eax
4476 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4477 ; NoVLX-NEXT: vzeroupper
4480 %0 = bitcast <8 x i64> %__a to <8 x i64>
4481 %load = load i64, i64* %__b
4482 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4483 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4484 %2 = icmp eq <8 x i64> %0, %1
4485 %3 = bitcast i8 %__u to <8 x i1>
4486 %4 = and <8 x i1> %3, %2
4487 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4488 %6 = bitcast <16 x i1> %5 to i16
4493 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4494 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4495 ; VLX: # %bb.0: # %entry
4496 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4497 ; VLX-NEXT: kmovd %k0, %eax
4498 ; VLX-NEXT: vzeroupper
4501 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4502 ; NoVLX: # %bb.0: # %entry
4503 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4504 ; NoVLX-NEXT: kmovw %k0, %eax
4505 ; NoVLX-NEXT: vzeroupper
4508 %0 = bitcast <8 x i64> %__a to <8 x i64>
4509 %1 = bitcast <8 x i64> %__b to <8 x i64>
4510 %2 = icmp eq <8 x i64> %0, %1
4511 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4512 %4 = bitcast <32 x i1> %3 to i32
4516 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4517 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4518 ; VLX: # %bb.0: # %entry
4519 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4520 ; VLX-NEXT: kmovd %k0, %eax
4521 ; VLX-NEXT: vzeroupper
4524 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4525 ; NoVLX: # %bb.0: # %entry
4526 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4527 ; NoVLX-NEXT: kmovw %k0, %eax
4528 ; NoVLX-NEXT: vzeroupper
4531 %0 = bitcast <8 x i64> %__a to <8 x i64>
4532 %load = load <8 x i64>, <8 x i64>* %__b
4533 %1 = bitcast <8 x i64> %load to <8 x i64>
4534 %2 = icmp eq <8 x i64> %0, %1
4535 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4536 %4 = bitcast <32 x i1> %3 to i32
4540 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4541 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4542 ; VLX: # %bb.0: # %entry
4543 ; VLX-NEXT: kmovd %edi, %k1
4544 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4545 ; VLX-NEXT: kmovd %k0, %eax
4546 ; VLX-NEXT: vzeroupper
4549 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4550 ; NoVLX: # %bb.0: # %entry
4551 ; NoVLX-NEXT: kmovw %edi, %k1
4552 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4553 ; NoVLX-NEXT: kmovw %k0, %eax
4554 ; NoVLX-NEXT: vzeroupper
4557 %0 = bitcast <8 x i64> %__a to <8 x i64>
4558 %1 = bitcast <8 x i64> %__b to <8 x i64>
4559 %2 = icmp eq <8 x i64> %0, %1
4560 %3 = bitcast i8 %__u to <8 x i1>
4561 %4 = and <8 x i1> %2, %3
4562 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4563 %6 = bitcast <32 x i1> %5 to i32
4567 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4568 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4569 ; VLX: # %bb.0: # %entry
4570 ; VLX-NEXT: kmovd %edi, %k1
4571 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4572 ; VLX-NEXT: kmovd %k0, %eax
4573 ; VLX-NEXT: vzeroupper
4576 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4577 ; NoVLX: # %bb.0: # %entry
4578 ; NoVLX-NEXT: kmovw %edi, %k1
4579 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4580 ; NoVLX-NEXT: kmovw %k0, %eax
4581 ; NoVLX-NEXT: vzeroupper
4584 %0 = bitcast <8 x i64> %__a to <8 x i64>
4585 %load = load <8 x i64>, <8 x i64>* %__b
4586 %1 = bitcast <8 x i64> %load to <8 x i64>
4587 %2 = icmp eq <8 x i64> %0, %1
4588 %3 = bitcast i8 %__u to <8 x i1>
4589 %4 = and <8 x i1> %2, %3
4590 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4591 %6 = bitcast <32 x i1> %5 to i32
4596 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4597 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4598 ; VLX: # %bb.0: # %entry
4599 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4600 ; VLX-NEXT: kmovd %k0, %eax
4601 ; VLX-NEXT: vzeroupper
4604 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4605 ; NoVLX: # %bb.0: # %entry
4606 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4607 ; NoVLX-NEXT: kmovw %k0, %eax
4608 ; NoVLX-NEXT: vzeroupper
4611 %0 = bitcast <8 x i64> %__a to <8 x i64>
4612 %load = load i64, i64* %__b
4613 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4614 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4615 %2 = icmp eq <8 x i64> %0, %1
4616 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4617 %4 = bitcast <32 x i1> %3 to i32
4621 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4622 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4623 ; VLX: # %bb.0: # %entry
4624 ; VLX-NEXT: kmovd %edi, %k1
4625 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4626 ; VLX-NEXT: kmovd %k0, %eax
4627 ; VLX-NEXT: vzeroupper
4630 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4631 ; NoVLX: # %bb.0: # %entry
4632 ; NoVLX-NEXT: kmovw %edi, %k1
4633 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4634 ; NoVLX-NEXT: kmovw %k0, %eax
4635 ; NoVLX-NEXT: vzeroupper
4638 %0 = bitcast <8 x i64> %__a to <8 x i64>
4639 %load = load i64, i64* %__b
4640 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4641 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4642 %2 = icmp eq <8 x i64> %0, %1
4643 %3 = bitcast i8 %__u to <8 x i1>
4644 %4 = and <8 x i1> %3, %2
4645 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4646 %6 = bitcast <32 x i1> %5 to i32
4651 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4652 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4653 ; VLX: # %bb.0: # %entry
4654 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4655 ; VLX-NEXT: kmovq %k0, %rax
4656 ; VLX-NEXT: vzeroupper
4659 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4660 ; NoVLX: # %bb.0: # %entry
4661 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4662 ; NoVLX-NEXT: kmovw %k0, %eax
4663 ; NoVLX-NEXT: vzeroupper
4666 %0 = bitcast <8 x i64> %__a to <8 x i64>
4667 %1 = bitcast <8 x i64> %__b to <8 x i64>
4668 %2 = icmp eq <8 x i64> %0, %1
4669 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4670 %4 = bitcast <64 x i1> %3 to i64
4674 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4675 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4676 ; VLX: # %bb.0: # %entry
4677 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4678 ; VLX-NEXT: kmovq %k0, %rax
4679 ; VLX-NEXT: vzeroupper
4682 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4683 ; NoVLX: # %bb.0: # %entry
4684 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4685 ; NoVLX-NEXT: kmovw %k0, %eax
4686 ; NoVLX-NEXT: vzeroupper
4689 %0 = bitcast <8 x i64> %__a to <8 x i64>
4690 %load = load <8 x i64>, <8 x i64>* %__b
4691 %1 = bitcast <8 x i64> %load to <8 x i64>
4692 %2 = icmp eq <8 x i64> %0, %1
4693 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4694 %4 = bitcast <64 x i1> %3 to i64
4698 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4699 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4700 ; VLX: # %bb.0: # %entry
4701 ; VLX-NEXT: kmovd %edi, %k1
4702 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4703 ; VLX-NEXT: kmovq %k0, %rax
4704 ; VLX-NEXT: vzeroupper
4707 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4708 ; NoVLX: # %bb.0: # %entry
4709 ; NoVLX-NEXT: kmovw %edi, %k1
4710 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4711 ; NoVLX-NEXT: kmovw %k0, %eax
4712 ; NoVLX-NEXT: vzeroupper
4715 %0 = bitcast <8 x i64> %__a to <8 x i64>
4716 %1 = bitcast <8 x i64> %__b to <8 x i64>
4717 %2 = icmp eq <8 x i64> %0, %1
4718 %3 = bitcast i8 %__u to <8 x i1>
4719 %4 = and <8 x i1> %2, %3
4720 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4721 %6 = bitcast <64 x i1> %5 to i64
4725 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4726 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4727 ; VLX: # %bb.0: # %entry
4728 ; VLX-NEXT: kmovd %edi, %k1
4729 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4730 ; VLX-NEXT: kmovq %k0, %rax
4731 ; VLX-NEXT: vzeroupper
4734 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4735 ; NoVLX: # %bb.0: # %entry
4736 ; NoVLX-NEXT: kmovw %edi, %k1
4737 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4738 ; NoVLX-NEXT: kmovw %k0, %eax
4739 ; NoVLX-NEXT: vzeroupper
4742 %0 = bitcast <8 x i64> %__a to <8 x i64>
4743 %load = load <8 x i64>, <8 x i64>* %__b
4744 %1 = bitcast <8 x i64> %load to <8 x i64>
4745 %2 = icmp eq <8 x i64> %0, %1
4746 %3 = bitcast i8 %__u to <8 x i1>
4747 %4 = and <8 x i1> %2, %3
4748 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4749 %6 = bitcast <64 x i1> %5 to i64
4754 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4755 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4756 ; VLX: # %bb.0: # %entry
4757 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4758 ; VLX-NEXT: kmovq %k0, %rax
4759 ; VLX-NEXT: vzeroupper
4762 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4763 ; NoVLX: # %bb.0: # %entry
4764 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4765 ; NoVLX-NEXT: kmovw %k0, %eax
4766 ; NoVLX-NEXT: vzeroupper
4769 %0 = bitcast <8 x i64> %__a to <8 x i64>
4770 %load = load i64, i64* %__b
4771 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4772 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4773 %2 = icmp eq <8 x i64> %0, %1
4774 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4775 %4 = bitcast <64 x i1> %3 to i64
4779 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4780 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4781 ; VLX: # %bb.0: # %entry
4782 ; VLX-NEXT: kmovd %edi, %k1
4783 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4784 ; VLX-NEXT: kmovq %k0, %rax
4785 ; VLX-NEXT: vzeroupper
4788 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4789 ; NoVLX: # %bb.0: # %entry
4790 ; NoVLX-NEXT: kmovw %edi, %k1
4791 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4792 ; NoVLX-NEXT: kmovw %k0, %eax
4793 ; NoVLX-NEXT: vzeroupper
4796 %0 = bitcast <8 x i64> %__a to <8 x i64>
4797 %load = load i64, i64* %__b
4798 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4799 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4800 %2 = icmp eq <8 x i64> %0, %1
4801 %3 = bitcast i8 %__u to <8 x i1>
4802 %4 = and <8 x i1> %3, %2
4803 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4804 %6 = bitcast <64 x i1> %5 to i64
4809 define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4810 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4811 ; VLX: # %bb.0: # %entry
4812 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
4813 ; VLX-NEXT: kmovd %k0, %eax
4816 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4817 ; NoVLX: # %bb.0: # %entry
4818 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4819 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4820 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4821 ; NoVLX-NEXT: kmovw %k0, %eax
4822 ; NoVLX-NEXT: vzeroupper
4825 %0 = bitcast <2 x i64> %__a to <16 x i8>
4826 %1 = bitcast <2 x i64> %__b to <16 x i8>
4827 %2 = icmp sgt <16 x i8> %0, %1
4828 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4829 %4 = bitcast <32 x i1> %3 to i32
4833 define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4834 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4835 ; VLX: # %bb.0: # %entry
4836 ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
4837 ; VLX-NEXT: kmovd %k0, %eax
4840 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4841 ; NoVLX: # %bb.0: # %entry
4842 ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
4843 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4844 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4845 ; NoVLX-NEXT: kmovw %k0, %eax
4846 ; NoVLX-NEXT: vzeroupper
4849 %0 = bitcast <2 x i64> %__a to <16 x i8>
4850 %load = load <2 x i64>, <2 x i64>* %__b
4851 %1 = bitcast <2 x i64> %load to <16 x i8>
4852 %2 = icmp sgt <16 x i8> %0, %1
4853 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4854 %4 = bitcast <32 x i1> %3 to i32
4858 define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4859 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4860 ; VLX: # %bb.0: # %entry
4861 ; VLX-NEXT: kmovd %edi, %k1
4862 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
4863 ; VLX-NEXT: kmovd %k0, %eax
4866 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4867 ; NoVLX: # %bb.0: # %entry
4868 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4869 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4870 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4871 ; NoVLX-NEXT: kmovw %k0, %eax
4872 ; NoVLX-NEXT: andl %edi, %eax
4873 ; NoVLX-NEXT: vzeroupper
4876 %0 = bitcast <2 x i64> %__a to <16 x i8>
4877 %1 = bitcast <2 x i64> %__b to <16 x i8>
4878 %2 = icmp sgt <16 x i8> %0, %1
4879 %3 = bitcast i16 %__u to <16 x i1>
4880 %4 = and <16 x i1> %2, %3
4881 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4882 %6 = bitcast <32 x i1> %5 to i32
4886 define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4887 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4888 ; VLX: # %bb.0: # %entry
4889 ; VLX-NEXT: kmovd %edi, %k1
4890 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
4891 ; VLX-NEXT: kmovd %k0, %eax
4894 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4895 ; NoVLX: # %bb.0: # %entry
4896 ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
4897 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4898 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4899 ; NoVLX-NEXT: kmovw %k0, %eax
4900 ; NoVLX-NEXT: andl %edi, %eax
4901 ; NoVLX-NEXT: vzeroupper
4904 %0 = bitcast <2 x i64> %__a to <16 x i8>
4905 %load = load <2 x i64>, <2 x i64>* %__b
4906 %1 = bitcast <2 x i64> %load to <16 x i8>
4907 %2 = icmp sgt <16 x i8> %0, %1
4908 %3 = bitcast i16 %__u to <16 x i1>
4909 %4 = and <16 x i1> %2, %3
4910 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4911 %6 = bitcast <32 x i1> %5 to i32
4916 define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4917 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
4918 ; VLX: # %bb.0: # %entry
4919 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
4920 ; VLX-NEXT: kmovq %k0, %rax
4923 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
4924 ; NoVLX: # %bb.0: # %entry
4925 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4926 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4927 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4928 ; NoVLX-NEXT: kmovw %k0, %eax
4929 ; NoVLX-NEXT: vzeroupper
4932 %0 = bitcast <2 x i64> %__a to <16 x i8>
4933 %1 = bitcast <2 x i64> %__b to <16 x i8>
4934 %2 = icmp sgt <16 x i8> %0, %1
4935 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4936 %4 = bitcast <64 x i1> %3 to i64
4940 define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4941 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
4942 ; VLX: # %bb.0: # %entry
4943 ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
4944 ; VLX-NEXT: kmovq %k0, %rax
4947 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
4948 ; NoVLX: # %bb.0: # %entry
4949 ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
4950 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4951 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4952 ; NoVLX-NEXT: kmovw %k0, %eax
4953 ; NoVLX-NEXT: vzeroupper
4956 %0 = bitcast <2 x i64> %__a to <16 x i8>
4957 %load = load <2 x i64>, <2 x i64>* %__b
4958 %1 = bitcast <2 x i64> %load to <16 x i8>
4959 %2 = icmp sgt <16 x i8> %0, %1
4960 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4961 %4 = bitcast <64 x i1> %3 to i64
4965 define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4966 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
4967 ; VLX: # %bb.0: # %entry
4968 ; VLX-NEXT: kmovd %edi, %k1
4969 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
4970 ; VLX-NEXT: kmovq %k0, %rax
4973 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
4974 ; NoVLX: # %bb.0: # %entry
4975 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4976 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4977 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4978 ; NoVLX-NEXT: kmovw %k0, %eax
4979 ; NoVLX-NEXT: andl %edi, %eax
4980 ; NoVLX-NEXT: vzeroupper
4983 %0 = bitcast <2 x i64> %__a to <16 x i8>
4984 %1 = bitcast <2 x i64> %__b to <16 x i8>
4985 %2 = icmp sgt <16 x i8> %0, %1
4986 %3 = bitcast i16 %__u to <16 x i1>
4987 %4 = and <16 x i1> %2, %3
4988 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4989 %6 = bitcast <64 x i1> %5 to i64
4993 define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4994 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
4995 ; VLX: # %bb.0: # %entry
4996 ; VLX-NEXT: kmovd %edi, %k1
4997 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
4998 ; VLX-NEXT: kmovq %k0, %rax
5001 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5002 ; NoVLX: # %bb.0: # %entry
5003 ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
5004 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5005 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5006 ; NoVLX-NEXT: kmovw %k0, %eax
5007 ; NoVLX-NEXT: andl %edi, %eax
5008 ; NoVLX-NEXT: vzeroupper
5011 %0 = bitcast <2 x i64> %__a to <16 x i8>
5012 %load = load <2 x i64>, <2 x i64>* %__b
5013 %1 = bitcast <2 x i64> %load to <16 x i8>
5014 %2 = icmp sgt <16 x i8> %0, %1
5015 %3 = bitcast i16 %__u to <16 x i1>
5016 %4 = and <16 x i1> %2, %3
5017 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5018 %6 = bitcast <64 x i1> %5 to i64
5023 define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5024 ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5025 ; VLX: # %bb.0: # %entry
5026 ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
5027 ; VLX-NEXT: kmovq %k0, %rax
5028 ; VLX-NEXT: vzeroupper
5031 ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5032 ; NoVLX: # %bb.0: # %entry
5033 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
5034 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5035 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5036 ; NoVLX-NEXT: kmovw %k0, %ecx
5037 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5038 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5039 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5040 ; NoVLX-NEXT: kmovw %k0, %eax
5041 ; NoVLX-NEXT: shll $16, %eax
5042 ; NoVLX-NEXT: orl %ecx, %eax
5043 ; NoVLX-NEXT: vzeroupper
5046 %0 = bitcast <4 x i64> %__a to <32 x i8>
5047 %1 = bitcast <4 x i64> %__b to <32 x i8>
5048 %2 = icmp sgt <32 x i8> %0, %1
5049 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5050 %4 = bitcast <64 x i1> %3 to i64
5054 define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5055 ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5056 ; VLX: # %bb.0: # %entry
5057 ; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
5058 ; VLX-NEXT: kmovq %k0, %rax
5059 ; VLX-NEXT: vzeroupper
5062 ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5063 ; NoVLX: # %bb.0: # %entry
5064 ; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0
5065 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5066 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5067 ; NoVLX-NEXT: kmovw %k0, %ecx
5068 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5069 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5070 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5071 ; NoVLX-NEXT: kmovw %k0, %eax
5072 ; NoVLX-NEXT: shll $16, %eax
5073 ; NoVLX-NEXT: orl %ecx, %eax
5074 ; NoVLX-NEXT: vzeroupper
5077 %0 = bitcast <4 x i64> %__a to <32 x i8>
5078 %load = load <4 x i64>, <4 x i64>* %__b
5079 %1 = bitcast <4 x i64> %load to <32 x i8>
5080 %2 = icmp sgt <32 x i8> %0, %1
5081 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5082 %4 = bitcast <64 x i1> %3 to i64
5086 define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5087 ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5088 ; VLX: # %bb.0: # %entry
5089 ; VLX-NEXT: kmovd %edi, %k1
5090 ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
5091 ; VLX-NEXT: kmovq %k0, %rax
5092 ; VLX-NEXT: vzeroupper
5095 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5096 ; NoVLX: # %bb.0: # %entry
5097 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
5098 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5099 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5100 ; NoVLX-NEXT: kmovw %k0, %eax
5101 ; NoVLX-NEXT: andl %edi, %eax
5102 ; NoVLX-NEXT: shrl $16, %edi
5103 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5104 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5105 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5106 ; NoVLX-NEXT: kmovw %k0, %ecx
5107 ; NoVLX-NEXT: andl %edi, %ecx
5108 ; NoVLX-NEXT: shll $16, %ecx
5109 ; NoVLX-NEXT: movzwl %ax, %eax
5110 ; NoVLX-NEXT: orl %ecx, %eax
5111 ; NoVLX-NEXT: vzeroupper
5114 %0 = bitcast <4 x i64> %__a to <32 x i8>
5115 %1 = bitcast <4 x i64> %__b to <32 x i8>
5116 %2 = icmp sgt <32 x i8> %0, %1
5117 %3 = bitcast i32 %__u to <32 x i1>
5118 %4 = and <32 x i1> %2, %3
5119 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5120 %6 = bitcast <64 x i1> %5 to i64
5124 define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5125 ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5126 ; VLX: # %bb.0: # %entry
5127 ; VLX-NEXT: kmovd %edi, %k1
5128 ; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
5129 ; VLX-NEXT: kmovq %k0, %rax
5130 ; VLX-NEXT: vzeroupper
5133 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5134 ; NoVLX: # %bb.0: # %entry
5135 ; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
5136 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5137 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5138 ; NoVLX-NEXT: kmovw %k0, %eax
5139 ; NoVLX-NEXT: andl %edi, %eax
5140 ; NoVLX-NEXT: shrl $16, %edi
5141 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5142 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5143 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5144 ; NoVLX-NEXT: kmovw %k0, %ecx
5145 ; NoVLX-NEXT: andl %edi, %ecx
5146 ; NoVLX-NEXT: shll $16, %ecx
5147 ; NoVLX-NEXT: movzwl %ax, %eax
5148 ; NoVLX-NEXT: orl %ecx, %eax
5149 ; NoVLX-NEXT: vzeroupper
5152 %0 = bitcast <4 x i64> %__a to <32 x i8>
5153 %load = load <4 x i64>, <4 x i64>* %__b
5154 %1 = bitcast <4 x i64> %load to <32 x i8>
5155 %2 = icmp sgt <32 x i8> %0, %1
5156 %3 = bitcast i32 %__u to <32 x i1>
5157 %4 = and <32 x i1> %2, %3
5158 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5159 %6 = bitcast <64 x i1> %5 to i64
5164 define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5165 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5166 ; VLX: # %bb.0: # %entry
5167 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5168 ; VLX-NEXT: kmovd %k0, %eax
5169 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5172 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5173 ; NoVLX: # %bb.0: # %entry
5174 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5175 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5176 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5177 ; NoVLX-NEXT: kmovw %k0, %eax
5178 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5179 ; NoVLX-NEXT: vzeroupper
5182 %0 = bitcast <2 x i64> %__a to <8 x i16>
5183 %1 = bitcast <2 x i64> %__b to <8 x i16>
5184 %2 = icmp sgt <8 x i16> %0, %1
5185 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5186 %4 = bitcast <16 x i1> %3 to i16
5190 define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5191 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5192 ; VLX: # %bb.0: # %entry
5193 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5194 ; VLX-NEXT: kmovd %k0, %eax
5195 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5198 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5199 ; NoVLX: # %bb.0: # %entry
5200 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5201 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5202 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5203 ; NoVLX-NEXT: kmovw %k0, %eax
5204 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5205 ; NoVLX-NEXT: vzeroupper
5208 %0 = bitcast <2 x i64> %__a to <8 x i16>
5209 %load = load <2 x i64>, <2 x i64>* %__b
5210 %1 = bitcast <2 x i64> %load to <8 x i16>
5211 %2 = icmp sgt <8 x i16> %0, %1
5212 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5213 %4 = bitcast <16 x i1> %3 to i16
5217 define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5218 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5219 ; VLX: # %bb.0: # %entry
5220 ; VLX-NEXT: kmovd %edi, %k1
5221 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5222 ; VLX-NEXT: kmovd %k0, %eax
5223 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5226 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5227 ; NoVLX: # %bb.0: # %entry
5228 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5229 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5230 ; NoVLX-NEXT: kmovw %edi, %k1
5231 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5232 ; NoVLX-NEXT: kmovw %k0, %eax
5233 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5234 ; NoVLX-NEXT: vzeroupper
5237 %0 = bitcast <2 x i64> %__a to <8 x i16>
5238 %1 = bitcast <2 x i64> %__b to <8 x i16>
5239 %2 = icmp sgt <8 x i16> %0, %1
5240 %3 = bitcast i8 %__u to <8 x i1>
5241 %4 = and <8 x i1> %2, %3
5242 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5243 %6 = bitcast <16 x i1> %5 to i16
5247 define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5248 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5249 ; VLX: # %bb.0: # %entry
5250 ; VLX-NEXT: kmovd %edi, %k1
5251 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5252 ; VLX-NEXT: kmovd %k0, %eax
5253 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5256 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5257 ; NoVLX: # %bb.0: # %entry
5258 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5259 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5260 ; NoVLX-NEXT: kmovw %edi, %k1
5261 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5262 ; NoVLX-NEXT: kmovw %k0, %eax
5263 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5264 ; NoVLX-NEXT: vzeroupper
5267 %0 = bitcast <2 x i64> %__a to <8 x i16>
5268 %load = load <2 x i64>, <2 x i64>* %__b
5269 %1 = bitcast <2 x i64> %load to <8 x i16>
5270 %2 = icmp sgt <8 x i16> %0, %1
5271 %3 = bitcast i8 %__u to <8 x i1>
5272 %4 = and <8 x i1> %2, %3
5273 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5274 %6 = bitcast <16 x i1> %5 to i16
5279 define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5280 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5281 ; VLX: # %bb.0: # %entry
5282 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5283 ; VLX-NEXT: kmovd %k0, %eax
5286 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5287 ; NoVLX: # %bb.0: # %entry
5288 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5289 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5290 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5291 ; NoVLX-NEXT: kmovw %k0, %eax
5292 ; NoVLX-NEXT: vzeroupper
5295 %0 = bitcast <2 x i64> %__a to <8 x i16>
5296 %1 = bitcast <2 x i64> %__b to <8 x i16>
5297 %2 = icmp sgt <8 x i16> %0, %1
5298 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5299 %4 = bitcast <32 x i1> %3 to i32
5303 define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5304 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5305 ; VLX: # %bb.0: # %entry
5306 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5307 ; VLX-NEXT: kmovd %k0, %eax
5310 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5311 ; NoVLX: # %bb.0: # %entry
5312 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5313 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5314 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5315 ; NoVLX-NEXT: kmovw %k0, %eax
5316 ; NoVLX-NEXT: vzeroupper
5319 %0 = bitcast <2 x i64> %__a to <8 x i16>
5320 %load = load <2 x i64>, <2 x i64>* %__b
5321 %1 = bitcast <2 x i64> %load to <8 x i16>
5322 %2 = icmp sgt <8 x i16> %0, %1
5323 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5324 %4 = bitcast <32 x i1> %3 to i32
5328 define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5329 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5330 ; VLX: # %bb.0: # %entry
5331 ; VLX-NEXT: kmovd %edi, %k1
5332 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5333 ; VLX-NEXT: kmovd %k0, %eax
5336 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5337 ; NoVLX: # %bb.0: # %entry
5338 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5339 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5340 ; NoVLX-NEXT: kmovw %edi, %k1
5341 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5342 ; NoVLX-NEXT: kmovw %k0, %eax
5343 ; NoVLX-NEXT: vzeroupper
5346 %0 = bitcast <2 x i64> %__a to <8 x i16>
5347 %1 = bitcast <2 x i64> %__b to <8 x i16>
5348 %2 = icmp sgt <8 x i16> %0, %1
5349 %3 = bitcast i8 %__u to <8 x i1>
5350 %4 = and <8 x i1> %2, %3
5351 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5352 %6 = bitcast <32 x i1> %5 to i32
5356 define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5357 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5358 ; VLX: # %bb.0: # %entry
5359 ; VLX-NEXT: kmovd %edi, %k1
5360 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5361 ; VLX-NEXT: kmovd %k0, %eax
5364 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5365 ; NoVLX: # %bb.0: # %entry
5366 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5367 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5368 ; NoVLX-NEXT: kmovw %edi, %k1
5369 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5370 ; NoVLX-NEXT: kmovw %k0, %eax
5371 ; NoVLX-NEXT: vzeroupper
5374 %0 = bitcast <2 x i64> %__a to <8 x i16>
5375 %load = load <2 x i64>, <2 x i64>* %__b
5376 %1 = bitcast <2 x i64> %load to <8 x i16>
5377 %2 = icmp sgt <8 x i16> %0, %1
5378 %3 = bitcast i8 %__u to <8 x i1>
5379 %4 = and <8 x i1> %2, %3
5380 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5381 %6 = bitcast <32 x i1> %5 to i32
5386 define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5387 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5388 ; VLX: # %bb.0: # %entry
5389 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5390 ; VLX-NEXT: kmovq %k0, %rax
5393 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5394 ; NoVLX: # %bb.0: # %entry
5395 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5396 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5397 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5398 ; NoVLX-NEXT: kmovw %k0, %eax
5399 ; NoVLX-NEXT: vzeroupper
5402 %0 = bitcast <2 x i64> %__a to <8 x i16>
5403 %1 = bitcast <2 x i64> %__b to <8 x i16>
5404 %2 = icmp sgt <8 x i16> %0, %1
5405 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5406 %4 = bitcast <64 x i1> %3 to i64
5410 define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5411 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5412 ; VLX: # %bb.0: # %entry
5413 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5414 ; VLX-NEXT: kmovq %k0, %rax
5417 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5418 ; NoVLX: # %bb.0: # %entry
5419 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5420 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5421 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5422 ; NoVLX-NEXT: kmovw %k0, %eax
5423 ; NoVLX-NEXT: vzeroupper
5426 %0 = bitcast <2 x i64> %__a to <8 x i16>
5427 %load = load <2 x i64>, <2 x i64>* %__b
5428 %1 = bitcast <2 x i64> %load to <8 x i16>
5429 %2 = icmp sgt <8 x i16> %0, %1
5430 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5431 %4 = bitcast <64 x i1> %3 to i64
5435 define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5436 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5437 ; VLX: # %bb.0: # %entry
5438 ; VLX-NEXT: kmovd %edi, %k1
5439 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5440 ; VLX-NEXT: kmovq %k0, %rax
5443 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5444 ; NoVLX: # %bb.0: # %entry
5445 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5446 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5447 ; NoVLX-NEXT: kmovw %edi, %k1
5448 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5449 ; NoVLX-NEXT: kmovw %k0, %eax
5450 ; NoVLX-NEXT: vzeroupper
5453 %0 = bitcast <2 x i64> %__a to <8 x i16>
5454 %1 = bitcast <2 x i64> %__b to <8 x i16>
5455 %2 = icmp sgt <8 x i16> %0, %1
5456 %3 = bitcast i8 %__u to <8 x i1>
5457 %4 = and <8 x i1> %2, %3
5458 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5459 %6 = bitcast <64 x i1> %5 to i64
5463 define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5464 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5465 ; VLX: # %bb.0: # %entry
5466 ; VLX-NEXT: kmovd %edi, %k1
5467 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5468 ; VLX-NEXT: kmovq %k0, %rax
5471 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5472 ; NoVLX: # %bb.0: # %entry
5473 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5474 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5475 ; NoVLX-NEXT: kmovw %edi, %k1
5476 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5477 ; NoVLX-NEXT: kmovw %k0, %eax
5478 ; NoVLX-NEXT: vzeroupper
5481 %0 = bitcast <2 x i64> %__a to <8 x i16>
5482 %load = load <2 x i64>, <2 x i64>* %__b
5483 %1 = bitcast <2 x i64> %load to <8 x i16>
5484 %2 = icmp sgt <8 x i16> %0, %1
5485 %3 = bitcast i8 %__u to <8 x i1>
5486 %4 = and <8 x i1> %2, %3
5487 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5488 %6 = bitcast <64 x i1> %5 to i64
5493 define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5494 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5495 ; VLX: # %bb.0: # %entry
5496 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
5497 ; VLX-NEXT: kmovd %k0, %eax
5498 ; VLX-NEXT: vzeroupper
5501 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5502 ; NoVLX: # %bb.0: # %entry
5503 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5504 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5505 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5506 ; NoVLX-NEXT: kmovw %k0, %eax
5507 ; NoVLX-NEXT: vzeroupper
5510 %0 = bitcast <4 x i64> %__a to <16 x i16>
5511 %1 = bitcast <4 x i64> %__b to <16 x i16>
5512 %2 = icmp sgt <16 x i16> %0, %1
5513 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5514 %4 = bitcast <32 x i1> %3 to i32
5518 define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5519 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5520 ; VLX: # %bb.0: # %entry
5521 ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
5522 ; VLX-NEXT: kmovd %k0, %eax
5523 ; VLX-NEXT: vzeroupper
5526 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5527 ; NoVLX: # %bb.0: # %entry
5528 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5529 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5530 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5531 ; NoVLX-NEXT: kmovw %k0, %eax
5532 ; NoVLX-NEXT: vzeroupper
5535 %0 = bitcast <4 x i64> %__a to <16 x i16>
5536 %load = load <4 x i64>, <4 x i64>* %__b
5537 %1 = bitcast <4 x i64> %load to <16 x i16>
5538 %2 = icmp sgt <16 x i16> %0, %1
5539 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5540 %4 = bitcast <32 x i1> %3 to i32
5544 define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5545 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5546 ; VLX: # %bb.0: # %entry
5547 ; VLX-NEXT: kmovd %edi, %k1
5548 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5549 ; VLX-NEXT: kmovd %k0, %eax
5550 ; VLX-NEXT: vzeroupper
5553 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5554 ; NoVLX: # %bb.0: # %entry
5555 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5556 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5557 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5558 ; NoVLX-NEXT: kmovw %k0, %eax
5559 ; NoVLX-NEXT: andl %edi, %eax
5560 ; NoVLX-NEXT: vzeroupper
5563 %0 = bitcast <4 x i64> %__a to <16 x i16>
5564 %1 = bitcast <4 x i64> %__b to <16 x i16>
5565 %2 = icmp sgt <16 x i16> %0, %1
5566 %3 = bitcast i16 %__u to <16 x i1>
5567 %4 = and <16 x i1> %2, %3
5568 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5569 %6 = bitcast <32 x i1> %5 to i32
5573 define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5574 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5575 ; VLX: # %bb.0: # %entry
5576 ; VLX-NEXT: kmovd %edi, %k1
5577 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5578 ; VLX-NEXT: kmovd %k0, %eax
5579 ; VLX-NEXT: vzeroupper
5582 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5583 ; NoVLX: # %bb.0: # %entry
5584 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
5585 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5586 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5587 ; NoVLX-NEXT: kmovw %k0, %eax
5588 ; NoVLX-NEXT: andl %edi, %eax
5589 ; NoVLX-NEXT: vzeroupper
5592 %0 = bitcast <4 x i64> %__a to <16 x i16>
5593 %load = load <4 x i64>, <4 x i64>* %__b
5594 %1 = bitcast <4 x i64> %load to <16 x i16>
5595 %2 = icmp sgt <16 x i16> %0, %1
5596 %3 = bitcast i16 %__u to <16 x i1>
5597 %4 = and <16 x i1> %2, %3
5598 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5599 %6 = bitcast <32 x i1> %5 to i32
5604 define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5605 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5606 ; VLX: # %bb.0: # %entry
5607 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
5608 ; VLX-NEXT: kmovq %k0, %rax
5609 ; VLX-NEXT: vzeroupper
5612 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5613 ; NoVLX: # %bb.0: # %entry
5614 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5615 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5616 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5617 ; NoVLX-NEXT: kmovw %k0, %eax
5618 ; NoVLX-NEXT: vzeroupper
5621 %0 = bitcast <4 x i64> %__a to <16 x i16>
5622 %1 = bitcast <4 x i64> %__b to <16 x i16>
5623 %2 = icmp sgt <16 x i16> %0, %1
5624 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5625 %4 = bitcast <64 x i1> %3 to i64
5629 define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5630 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5631 ; VLX: # %bb.0: # %entry
5632 ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
5633 ; VLX-NEXT: kmovq %k0, %rax
5634 ; VLX-NEXT: vzeroupper
5637 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5638 ; NoVLX: # %bb.0: # %entry
5639 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5640 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5641 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5642 ; NoVLX-NEXT: kmovw %k0, %eax
5643 ; NoVLX-NEXT: vzeroupper
5646 %0 = bitcast <4 x i64> %__a to <16 x i16>
5647 %load = load <4 x i64>, <4 x i64>* %__b
5648 %1 = bitcast <4 x i64> %load to <16 x i16>
5649 %2 = icmp sgt <16 x i16> %0, %1
5650 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5651 %4 = bitcast <64 x i1> %3 to i64
5655 define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5656 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5657 ; VLX: # %bb.0: # %entry
5658 ; VLX-NEXT: kmovd %edi, %k1
5659 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5660 ; VLX-NEXT: kmovq %k0, %rax
5661 ; VLX-NEXT: vzeroupper
5664 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5665 ; NoVLX: # %bb.0: # %entry
5666 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5667 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5668 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5669 ; NoVLX-NEXT: kmovw %k0, %eax
5670 ; NoVLX-NEXT: andl %edi, %eax
5671 ; NoVLX-NEXT: vzeroupper
5674 %0 = bitcast <4 x i64> %__a to <16 x i16>
5675 %1 = bitcast <4 x i64> %__b to <16 x i16>
5676 %2 = icmp sgt <16 x i16> %0, %1
5677 %3 = bitcast i16 %__u to <16 x i1>
5678 %4 = and <16 x i1> %2, %3
5679 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5680 %6 = bitcast <64 x i1> %5 to i64
5684 define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5685 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5686 ; VLX: # %bb.0: # %entry
5687 ; VLX-NEXT: kmovd %edi, %k1
5688 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5689 ; VLX-NEXT: kmovq %k0, %rax
5690 ; VLX-NEXT: vzeroupper
5693 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5694 ; NoVLX: # %bb.0: # %entry
5695 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
5696 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5697 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5698 ; NoVLX-NEXT: kmovw %k0, %eax
5699 ; NoVLX-NEXT: andl %edi, %eax
5700 ; NoVLX-NEXT: vzeroupper
5703 %0 = bitcast <4 x i64> %__a to <16 x i16>
5704 %load = load <4 x i64>, <4 x i64>* %__b
5705 %1 = bitcast <4 x i64> %load to <16 x i16>
5706 %2 = icmp sgt <16 x i16> %0, %1
5707 %3 = bitcast i16 %__u to <16 x i1>
5708 %4 = and <16 x i1> %2, %3
5709 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5710 %6 = bitcast <64 x i1> %5 to i64
5715 define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5716 ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5717 ; VLX: # %bb.0: # %entry
5718 ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
5719 ; VLX-NEXT: kmovq %k0, %rax
5720 ; VLX-NEXT: vzeroupper
5723 ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5724 ; NoVLX: # %bb.0: # %entry
5725 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
5726 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
5727 ; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
5728 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5729 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5730 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5731 ; NoVLX-NEXT: kmovw %k0, %ecx
5732 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
5733 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5734 ; NoVLX-NEXT: kmovw %k0, %eax
5735 ; NoVLX-NEXT: shll $16, %eax
5736 ; NoVLX-NEXT: orl %ecx, %eax
5737 ; NoVLX-NEXT: vzeroupper
5740 %0 = bitcast <8 x i64> %__a to <32 x i16>
5741 %1 = bitcast <8 x i64> %__b to <32 x i16>
5742 %2 = icmp sgt <32 x i16> %0, %1
5743 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5744 %4 = bitcast <64 x i1> %3 to i64
5748 define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
5749 ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5750 ; VLX: # %bb.0: # %entry
5751 ; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0
5752 ; VLX-NEXT: kmovq %k0, %rax
5753 ; VLX-NEXT: vzeroupper
5756 ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5757 ; NoVLX: # %bb.0: # %entry
5758 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
5759 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5760 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5761 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5762 ; NoVLX-NEXT: kmovw %k0, %ecx
5763 ; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm0
5764 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5765 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5766 ; NoVLX-NEXT: kmovw %k0, %eax
5767 ; NoVLX-NEXT: shll $16, %eax
5768 ; NoVLX-NEXT: orl %ecx, %eax
5769 ; NoVLX-NEXT: vzeroupper
5772 %0 = bitcast <8 x i64> %__a to <32 x i16>
5773 %load = load <8 x i64>, <8 x i64>* %__b
5774 %1 = bitcast <8 x i64> %load to <32 x i16>
5775 %2 = icmp sgt <32 x i16> %0, %1
5776 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5777 %4 = bitcast <64 x i1> %3 to i64
5781 define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5782 ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5783 ; VLX: # %bb.0: # %entry
5784 ; VLX-NEXT: kmovd %edi, %k1
5785 ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
5786 ; VLX-NEXT: kmovq %k0, %rax
5787 ; VLX-NEXT: vzeroupper
5790 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5791 ; NoVLX: # %bb.0: # %entry
5792 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2
5793 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
5794 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
5795 ; NoVLX-NEXT: kmovw %k0, %eax
5796 ; NoVLX-NEXT: andl %edi, %eax
5797 ; NoVLX-NEXT: shrl $16, %edi
5798 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
5799 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
5800 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5801 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5802 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5803 ; NoVLX-NEXT: kmovw %k0, %ecx
5804 ; NoVLX-NEXT: andl %edi, %ecx
5805 ; NoVLX-NEXT: shll $16, %ecx
5806 ; NoVLX-NEXT: movzwl %ax, %eax
5807 ; NoVLX-NEXT: orl %ecx, %eax
5808 ; NoVLX-NEXT: vzeroupper
5811 %0 = bitcast <8 x i64> %__a to <32 x i16>
5812 %1 = bitcast <8 x i64> %__b to <32 x i16>
5813 %2 = icmp sgt <32 x i16> %0, %1
5814 %3 = bitcast i32 %__u to <32 x i1>
5815 %4 = and <32 x i1> %2, %3
5816 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5817 %6 = bitcast <64 x i1> %5 to i64
5821 define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
5822 ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5823 ; VLX: # %bb.0: # %entry
5824 ; VLX-NEXT: kmovd %edi, %k1
5825 ; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
5826 ; VLX-NEXT: kmovq %k0, %rax
5827 ; VLX-NEXT: vzeroupper
5830 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5831 ; NoVLX: # %bb.0: # %entry
5832 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm1
5833 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
5834 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5835 ; NoVLX-NEXT: kmovw %k0, %eax
5836 ; NoVLX-NEXT: andl %edi, %eax
5837 ; NoVLX-NEXT: shrl $16, %edi
5838 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
5839 ; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0
5840 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5841 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5842 ; NoVLX-NEXT: kmovw %k0, %ecx
5843 ; NoVLX-NEXT: andl %edi, %ecx
5844 ; NoVLX-NEXT: shll $16, %ecx
5845 ; NoVLX-NEXT: movzwl %ax, %eax
5846 ; NoVLX-NEXT: orl %ecx, %eax
5847 ; NoVLX-NEXT: vzeroupper
5850 %0 = bitcast <8 x i64> %__a to <32 x i16>
5851 %load = load <8 x i64>, <8 x i64>* %__b
5852 %1 = bitcast <8 x i64> %load to <32 x i16>
5853 %2 = icmp sgt <32 x i16> %0, %1
5854 %3 = bitcast i32 %__u to <32 x i1>
5855 %4 = and <32 x i1> %2, %3
5856 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5857 %6 = bitcast <64 x i1> %5 to i64
5862 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5863 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5864 ; VLX: # %bb.0: # %entry
5865 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
5866 ; VLX-NEXT: kmovd %k0, %eax
5867 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5870 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5871 ; NoVLX: # %bb.0: # %entry
5872 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
5873 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5874 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
5875 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5876 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5877 ; NoVLX-NEXT: kmovw %k0, %eax
5878 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5879 ; NoVLX-NEXT: vzeroupper
5882 %0 = bitcast <2 x i64> %__a to <4 x i32>
5883 %1 = bitcast <2 x i64> %__b to <4 x i32>
5884 %2 = icmp sgt <4 x i32> %0, %1
5885 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5886 %4 = bitcast <8 x i1> %3 to i8
5890 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5891 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5892 ; VLX: # %bb.0: # %entry
5893 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
5894 ; VLX-NEXT: kmovd %k0, %eax
5895 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5898 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5899 ; NoVLX: # %bb.0: # %entry
5900 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5901 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
5902 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
5903 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5904 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5905 ; NoVLX-NEXT: kmovw %k0, %eax
5906 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5907 ; NoVLX-NEXT: vzeroupper
5910 %0 = bitcast <2 x i64> %__a to <4 x i32>
5911 %load = load <2 x i64>, <2 x i64>* %__b
5912 %1 = bitcast <2 x i64> %load to <4 x i32>
5913 %2 = icmp sgt <4 x i32> %0, %1
5914 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5915 %4 = bitcast <8 x i1> %3 to i8
5919 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5920 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
5921 ; VLX: # %bb.0: # %entry
5922 ; VLX-NEXT: kmovd %edi, %k1
5923 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
5924 ; VLX-NEXT: kmovd %k0, %eax
5925 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5928 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
5929 ; NoVLX: # %bb.0: # %entry
5930 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
5931 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5932 ; NoVLX-NEXT: kmovw %edi, %k1
5933 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
5934 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5935 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5936 ; NoVLX-NEXT: kmovw %k0, %eax
5937 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5938 ; NoVLX-NEXT: vzeroupper
5941 %0 = bitcast <2 x i64> %__a to <4 x i32>
5942 %1 = bitcast <2 x i64> %__b to <4 x i32>
5943 %2 = icmp sgt <4 x i32> %0, %1
5944 %3 = bitcast i8 %__u to <8 x i1>
5945 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5946 %4 = and <4 x i1> %2, %extract.i
5947 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5948 %6 = bitcast <8 x i1> %5 to i8
5952 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5953 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
5954 ; VLX: # %bb.0: # %entry
5955 ; VLX-NEXT: kmovd %edi, %k1
5956 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
5957 ; VLX-NEXT: kmovd %k0, %eax
5958 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5961 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
5962 ; NoVLX: # %bb.0: # %entry
5963 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5964 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
5965 ; NoVLX-NEXT: kmovw %edi, %k1
5966 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
5967 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5968 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5969 ; NoVLX-NEXT: kmovw %k0, %eax
5970 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5971 ; NoVLX-NEXT: vzeroupper
5974 %0 = bitcast <2 x i64> %__a to <4 x i32>
5975 %load = load <2 x i64>, <2 x i64>* %__b
5976 %1 = bitcast <2 x i64> %load to <4 x i32>
5977 %2 = icmp sgt <4 x i32> %0, %1
5978 %3 = bitcast i8 %__u to <8 x i1>
5979 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5980 %4 = and <4 x i1> %2, %extract.i
5981 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5982 %6 = bitcast <8 x i1> %5 to i8
5987 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
5988 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
5989 ; VLX: # %bb.0: # %entry
5990 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
5991 ; VLX-NEXT: kmovd %k0, %eax
5992 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5995 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
5996 ; NoVLX: # %bb.0: # %entry
5997 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5998 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
5999 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6000 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6001 ; NoVLX-NEXT: kmovw %k0, %eax
6002 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6003 ; NoVLX-NEXT: vzeroupper
6006 %0 = bitcast <2 x i64> %__a to <4 x i32>
6007 %load = load i32, i32* %__b
6008 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6009 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6010 %2 = icmp sgt <4 x i32> %0, %1
6011 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6012 %4 = bitcast <8 x i1> %3 to i8
6016 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6017 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6018 ; VLX: # %bb.0: # %entry
6019 ; VLX-NEXT: kmovd %edi, %k1
6020 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6021 ; VLX-NEXT: kmovd %k0, %eax
6022 ; VLX-NEXT: # kill: def $al killed $al killed $eax
6025 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6026 ; NoVLX: # %bb.0: # %entry
6027 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6028 ; NoVLX-NEXT: kmovw %edi, %k1
6029 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6030 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6031 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6032 ; NoVLX-NEXT: kmovw %k0, %eax
6033 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6034 ; NoVLX-NEXT: vzeroupper
6037 %0 = bitcast <2 x i64> %__a to <4 x i32>
6038 %load = load i32, i32* %__b
6039 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6040 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6041 %2 = icmp sgt <4 x i32> %0, %1
6042 %3 = bitcast i8 %__u to <8 x i1>
6043 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6044 %4 = and <4 x i1> %extract.i, %2
6045 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6046 %6 = bitcast <8 x i1> %5 to i8
6051 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6052 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6053 ; VLX: # %bb.0: # %entry
6054 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6055 ; VLX-NEXT: kmovd %k0, %eax
6056 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6059 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6060 ; NoVLX: # %bb.0: # %entry
6061 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6062 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6063 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6064 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6065 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6066 ; NoVLX-NEXT: kmovw %k0, %eax
6067 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6068 ; NoVLX-NEXT: vzeroupper
6071 %0 = bitcast <2 x i64> %__a to <4 x i32>
6072 %1 = bitcast <2 x i64> %__b to <4 x i32>
6073 %2 = icmp sgt <4 x i32> %0, %1
6074 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6075 %4 = bitcast <16 x i1> %3 to i16
6079 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6080 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6081 ; VLX: # %bb.0: # %entry
6082 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6083 ; VLX-NEXT: kmovd %k0, %eax
6084 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6087 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6088 ; NoVLX: # %bb.0: # %entry
6089 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6090 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6091 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6092 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6093 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6094 ; NoVLX-NEXT: kmovw %k0, %eax
6095 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6096 ; NoVLX-NEXT: vzeroupper
6099 %0 = bitcast <2 x i64> %__a to <4 x i32>
6100 %load = load <2 x i64>, <2 x i64>* %__b
6101 %1 = bitcast <2 x i64> %load to <4 x i32>
6102 %2 = icmp sgt <4 x i32> %0, %1
6103 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6104 %4 = bitcast <16 x i1> %3 to i16
6108 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6109 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6110 ; VLX: # %bb.0: # %entry
6111 ; VLX-NEXT: kmovd %edi, %k1
6112 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6113 ; VLX-NEXT: kmovd %k0, %eax
6114 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6117 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6118 ; NoVLX: # %bb.0: # %entry
6119 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6120 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6121 ; NoVLX-NEXT: kmovw %edi, %k1
6122 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6123 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6124 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6125 ; NoVLX-NEXT: kmovw %k0, %eax
6126 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6127 ; NoVLX-NEXT: vzeroupper
6130 %0 = bitcast <2 x i64> %__a to <4 x i32>
6131 %1 = bitcast <2 x i64> %__b to <4 x i32>
6132 %2 = icmp sgt <4 x i32> %0, %1
6133 %3 = bitcast i8 %__u to <8 x i1>
6134 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6135 %4 = and <4 x i1> %2, %extract.i
6136 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6137 %6 = bitcast <16 x i1> %5 to i16
6141 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6142 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6143 ; VLX: # %bb.0: # %entry
6144 ; VLX-NEXT: kmovd %edi, %k1
6145 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6146 ; VLX-NEXT: kmovd %k0, %eax
6147 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6150 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6151 ; NoVLX: # %bb.0: # %entry
6152 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6153 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6154 ; NoVLX-NEXT: kmovw %edi, %k1
6155 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6156 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6157 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6158 ; NoVLX-NEXT: kmovw %k0, %eax
6159 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6160 ; NoVLX-NEXT: vzeroupper
6163 %0 = bitcast <2 x i64> %__a to <4 x i32>
6164 %load = load <2 x i64>, <2 x i64>* %__b
6165 %1 = bitcast <2 x i64> %load to <4 x i32>
6166 %2 = icmp sgt <4 x i32> %0, %1
6167 %3 = bitcast i8 %__u to <8 x i1>
6168 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6169 %4 = and <4 x i1> %2, %extract.i
6170 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6171 %6 = bitcast <16 x i1> %5 to i16
6176 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6177 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6178 ; VLX: # %bb.0: # %entry
6179 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6180 ; VLX-NEXT: kmovd %k0, %eax
6181 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6184 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6185 ; NoVLX: # %bb.0: # %entry
6186 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6187 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6188 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6189 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6190 ; NoVLX-NEXT: kmovw %k0, %eax
6191 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6192 ; NoVLX-NEXT: vzeroupper
6195 %0 = bitcast <2 x i64> %__a to <4 x i32>
6196 %load = load i32, i32* %__b
6197 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6198 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6199 %2 = icmp sgt <4 x i32> %0, %1
6200 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6201 %4 = bitcast <16 x i1> %3 to i16
6205 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6206 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6207 ; VLX: # %bb.0: # %entry
6208 ; VLX-NEXT: kmovd %edi, %k1
6209 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6210 ; VLX-NEXT: kmovd %k0, %eax
6211 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6214 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6215 ; NoVLX: # %bb.0: # %entry
6216 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6217 ; NoVLX-NEXT: kmovw %edi, %k1
6218 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6219 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6220 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6221 ; NoVLX-NEXT: kmovw %k0, %eax
6222 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6223 ; NoVLX-NEXT: vzeroupper
6226 %0 = bitcast <2 x i64> %__a to <4 x i32>
6227 %load = load i32, i32* %__b
6228 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6229 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6230 %2 = icmp sgt <4 x i32> %0, %1
6231 %3 = bitcast i8 %__u to <8 x i1>
6232 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6233 %4 = and <4 x i1> %extract.i, %2
6234 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6235 %6 = bitcast <16 x i1> %5 to i16
6240 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6241 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6242 ; VLX: # %bb.0: # %entry
6243 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6244 ; VLX-NEXT: kmovd %k0, %eax
6247 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6248 ; NoVLX: # %bb.0: # %entry
6249 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6250 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6251 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6252 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6253 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6254 ; NoVLX-NEXT: kmovw %k0, %eax
6255 ; NoVLX-NEXT: vzeroupper
6258 %0 = bitcast <2 x i64> %__a to <4 x i32>
6259 %1 = bitcast <2 x i64> %__b to <4 x i32>
6260 %2 = icmp sgt <4 x i32> %0, %1
6261 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6262 %4 = bitcast <32 x i1> %3 to i32
6266 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6267 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6268 ; VLX: # %bb.0: # %entry
6269 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6270 ; VLX-NEXT: kmovd %k0, %eax
6273 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6274 ; NoVLX: # %bb.0: # %entry
6275 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6276 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6277 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6278 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6279 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6280 ; NoVLX-NEXT: kmovw %k0, %eax
6281 ; NoVLX-NEXT: vzeroupper
6284 %0 = bitcast <2 x i64> %__a to <4 x i32>
6285 %load = load <2 x i64>, <2 x i64>* %__b
6286 %1 = bitcast <2 x i64> %load to <4 x i32>
6287 %2 = icmp sgt <4 x i32> %0, %1
6288 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6289 %4 = bitcast <32 x i1> %3 to i32
6293 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6294 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6295 ; VLX: # %bb.0: # %entry
6296 ; VLX-NEXT: kmovd %edi, %k1
6297 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6298 ; VLX-NEXT: kmovd %k0, %eax
6301 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6302 ; NoVLX: # %bb.0: # %entry
6303 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6304 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6305 ; NoVLX-NEXT: kmovw %edi, %k1
6306 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6307 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6308 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6309 ; NoVLX-NEXT: kmovw %k0, %eax
6310 ; NoVLX-NEXT: vzeroupper
6313 %0 = bitcast <2 x i64> %__a to <4 x i32>
6314 %1 = bitcast <2 x i64> %__b to <4 x i32>
6315 %2 = icmp sgt <4 x i32> %0, %1
6316 %3 = bitcast i8 %__u to <8 x i1>
6317 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6318 %4 = and <4 x i1> %2, %extract.i
6319 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6320 %6 = bitcast <32 x i1> %5 to i32
6324 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6325 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6326 ; VLX: # %bb.0: # %entry
6327 ; VLX-NEXT: kmovd %edi, %k1
6328 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6329 ; VLX-NEXT: kmovd %k0, %eax
6332 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6333 ; NoVLX: # %bb.0: # %entry
6334 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6335 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6336 ; NoVLX-NEXT: kmovw %edi, %k1
6337 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6338 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6339 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6340 ; NoVLX-NEXT: kmovw %k0, %eax
6341 ; NoVLX-NEXT: vzeroupper
6344 %0 = bitcast <2 x i64> %__a to <4 x i32>
6345 %load = load <2 x i64>, <2 x i64>* %__b
6346 %1 = bitcast <2 x i64> %load to <4 x i32>
6347 %2 = icmp sgt <4 x i32> %0, %1
6348 %3 = bitcast i8 %__u to <8 x i1>
6349 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6350 %4 = and <4 x i1> %2, %extract.i
6351 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6352 %6 = bitcast <32 x i1> %5 to i32
6357 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6358 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6359 ; VLX: # %bb.0: # %entry
6360 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6361 ; VLX-NEXT: kmovd %k0, %eax
6364 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6365 ; NoVLX: # %bb.0: # %entry
6366 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6367 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6368 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6369 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6370 ; NoVLX-NEXT: kmovw %k0, %eax
6371 ; NoVLX-NEXT: vzeroupper
6374 %0 = bitcast <2 x i64> %__a to <4 x i32>
6375 %load = load i32, i32* %__b
6376 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6377 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6378 %2 = icmp sgt <4 x i32> %0, %1
6379 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6380 %4 = bitcast <32 x i1> %3 to i32
6384 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6385 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6386 ; VLX: # %bb.0: # %entry
6387 ; VLX-NEXT: kmovd %edi, %k1
6388 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6389 ; VLX-NEXT: kmovd %k0, %eax
6392 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6393 ; NoVLX: # %bb.0: # %entry
6394 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6395 ; NoVLX-NEXT: kmovw %edi, %k1
6396 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6397 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6398 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6399 ; NoVLX-NEXT: kmovw %k0, %eax
6400 ; NoVLX-NEXT: vzeroupper
6403 %0 = bitcast <2 x i64> %__a to <4 x i32>
6404 %load = load i32, i32* %__b
6405 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6406 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6407 %2 = icmp sgt <4 x i32> %0, %1
6408 %3 = bitcast i8 %__u to <8 x i1>
6409 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6410 %4 = and <4 x i1> %extract.i, %2
6411 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6412 %6 = bitcast <32 x i1> %5 to i32
6417 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6418 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6419 ; VLX: # %bb.0: # %entry
6420 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6421 ; VLX-NEXT: kmovq %k0, %rax
6424 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6425 ; NoVLX: # %bb.0: # %entry
6426 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6427 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6428 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6429 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6430 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6431 ; NoVLX-NEXT: kmovw %k0, %eax
6432 ; NoVLX-NEXT: vzeroupper
6435 %0 = bitcast <2 x i64> %__a to <4 x i32>
6436 %1 = bitcast <2 x i64> %__b to <4 x i32>
6437 %2 = icmp sgt <4 x i32> %0, %1
6438 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6439 %4 = bitcast <64 x i1> %3 to i64
6443 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6444 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6445 ; VLX: # %bb.0: # %entry
6446 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6447 ; VLX-NEXT: kmovq %k0, %rax
6450 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6451 ; NoVLX: # %bb.0: # %entry
6452 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6453 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6454 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6455 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6456 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6457 ; NoVLX-NEXT: kmovw %k0, %eax
6458 ; NoVLX-NEXT: vzeroupper
6461 %0 = bitcast <2 x i64> %__a to <4 x i32>
6462 %load = load <2 x i64>, <2 x i64>* %__b
6463 %1 = bitcast <2 x i64> %load to <4 x i32>
6464 %2 = icmp sgt <4 x i32> %0, %1
6465 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6466 %4 = bitcast <64 x i1> %3 to i64
6470 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6471 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6472 ; VLX: # %bb.0: # %entry
6473 ; VLX-NEXT: kmovd %edi, %k1
6474 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6475 ; VLX-NEXT: kmovq %k0, %rax
6478 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6479 ; NoVLX: # %bb.0: # %entry
6480 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6481 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6482 ; NoVLX-NEXT: kmovw %edi, %k1
6483 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6484 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6485 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6486 ; NoVLX-NEXT: kmovw %k0, %eax
6487 ; NoVLX-NEXT: vzeroupper
6490 %0 = bitcast <2 x i64> %__a to <4 x i32>
6491 %1 = bitcast <2 x i64> %__b to <4 x i32>
6492 %2 = icmp sgt <4 x i32> %0, %1
6493 %3 = bitcast i8 %__u to <8 x i1>
6494 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6495 %4 = and <4 x i1> %2, %extract.i
6496 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6497 %6 = bitcast <64 x i1> %5 to i64
6501 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6502 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6503 ; VLX: # %bb.0: # %entry
6504 ; VLX-NEXT: kmovd %edi, %k1
6505 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6506 ; VLX-NEXT: kmovq %k0, %rax
6509 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6510 ; NoVLX: # %bb.0: # %entry
6511 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6512 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6513 ; NoVLX-NEXT: kmovw %edi, %k1
6514 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6515 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6516 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6517 ; NoVLX-NEXT: kmovw %k0, %eax
6518 ; NoVLX-NEXT: vzeroupper
6521 %0 = bitcast <2 x i64> %__a to <4 x i32>
6522 %load = load <2 x i64>, <2 x i64>* %__b
6523 %1 = bitcast <2 x i64> %load to <4 x i32>
6524 %2 = icmp sgt <4 x i32> %0, %1
6525 %3 = bitcast i8 %__u to <8 x i1>
6526 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6527 %4 = and <4 x i1> %2, %extract.i
6528 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6529 %6 = bitcast <64 x i1> %5 to i64
6534 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6535 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6536 ; VLX: # %bb.0: # %entry
6537 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6538 ; VLX-NEXT: kmovq %k0, %rax
6541 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6542 ; NoVLX: # %bb.0: # %entry
6543 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6544 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6545 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6546 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6547 ; NoVLX-NEXT: kmovw %k0, %eax
6548 ; NoVLX-NEXT: vzeroupper
6551 %0 = bitcast <2 x i64> %__a to <4 x i32>
6552 %load = load i32, i32* %__b
6553 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6554 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6555 %2 = icmp sgt <4 x i32> %0, %1
6556 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6557 %4 = bitcast <64 x i1> %3 to i64
6561 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6562 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6563 ; VLX: # %bb.0: # %entry
6564 ; VLX-NEXT: kmovd %edi, %k1
6565 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6566 ; VLX-NEXT: kmovq %k0, %rax
6569 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6570 ; NoVLX: # %bb.0: # %entry
6571 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6572 ; NoVLX-NEXT: kmovw %edi, %k1
6573 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6574 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6575 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6576 ; NoVLX-NEXT: kmovw %k0, %eax
6577 ; NoVLX-NEXT: vzeroupper
6580 %0 = bitcast <2 x i64> %__a to <4 x i32>
6581 %load = load i32, i32* %__b
6582 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6583 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6584 %2 = icmp sgt <4 x i32> %0, %1
6585 %3 = bitcast i8 %__u to <8 x i1>
6586 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6587 %4 = and <4 x i1> %extract.i, %2
6588 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6589 %6 = bitcast <64 x i1> %5 to i64
6594 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6595 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6596 ; VLX: # %bb.0: # %entry
6597 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
6598 ; VLX-NEXT: kmovd %k0, %eax
6599 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6600 ; VLX-NEXT: vzeroupper
6603 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6604 ; NoVLX: # %bb.0: # %entry
6605 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6606 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6607 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6608 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6609 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6610 ; NoVLX-NEXT: kmovw %k0, %eax
6611 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6612 ; NoVLX-NEXT: vzeroupper
6615 %0 = bitcast <4 x i64> %__a to <8 x i32>
6616 %1 = bitcast <4 x i64> %__b to <8 x i32>
6617 %2 = icmp sgt <8 x i32> %0, %1
6618 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6619 %4 = bitcast <16 x i1> %3 to i16
6623 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6624 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6625 ; VLX: # %bb.0: # %entry
6626 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
6627 ; VLX-NEXT: kmovd %k0, %eax
6628 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6629 ; VLX-NEXT: vzeroupper
6632 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6633 ; NoVLX: # %bb.0: # %entry
6634 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6635 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
6636 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6637 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6638 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6639 ; NoVLX-NEXT: kmovw %k0, %eax
6640 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6641 ; NoVLX-NEXT: vzeroupper
6644 %0 = bitcast <4 x i64> %__a to <8 x i32>
6645 %load = load <4 x i64>, <4 x i64>* %__b
6646 %1 = bitcast <4 x i64> %load to <8 x i32>
6647 %2 = icmp sgt <8 x i32> %0, %1
6648 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6649 %4 = bitcast <16 x i1> %3 to i16
6653 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6654 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6655 ; VLX: # %bb.0: # %entry
6656 ; VLX-NEXT: kmovd %edi, %k1
6657 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6658 ; VLX-NEXT: kmovd %k0, %eax
6659 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6660 ; VLX-NEXT: vzeroupper
6663 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6664 ; NoVLX: # %bb.0: # %entry
6665 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6666 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6667 ; NoVLX-NEXT: kmovw %edi, %k1
6668 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6669 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6670 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6671 ; NoVLX-NEXT: kmovw %k0, %eax
6672 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6673 ; NoVLX-NEXT: vzeroupper
6676 %0 = bitcast <4 x i64> %__a to <8 x i32>
6677 %1 = bitcast <4 x i64> %__b to <8 x i32>
6678 %2 = icmp sgt <8 x i32> %0, %1
6679 %3 = bitcast i8 %__u to <8 x i1>
6680 %4 = and <8 x i1> %2, %3
6681 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6682 %6 = bitcast <16 x i1> %5 to i16
6686 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6687 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6688 ; VLX: # %bb.0: # %entry
6689 ; VLX-NEXT: kmovd %edi, %k1
6690 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6691 ; VLX-NEXT: kmovd %k0, %eax
6692 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6693 ; VLX-NEXT: vzeroupper
6696 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6697 ; NoVLX: # %bb.0: # %entry
6698 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6699 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
6700 ; NoVLX-NEXT: kmovw %edi, %k1
6701 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6702 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6703 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6704 ; NoVLX-NEXT: kmovw %k0, %eax
6705 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6706 ; NoVLX-NEXT: vzeroupper
6709 %0 = bitcast <4 x i64> %__a to <8 x i32>
6710 %load = load <4 x i64>, <4 x i64>* %__b
6711 %1 = bitcast <4 x i64> %load to <8 x i32>
6712 %2 = icmp sgt <8 x i32> %0, %1
6713 %3 = bitcast i8 %__u to <8 x i1>
6714 %4 = and <8 x i1> %2, %3
6715 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6716 %6 = bitcast <16 x i1> %5 to i16
6721 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
6722 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6723 ; VLX: # %bb.0: # %entry
6724 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
6725 ; VLX-NEXT: kmovd %k0, %eax
6726 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6727 ; VLX-NEXT: vzeroupper
6730 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6731 ; NoVLX: # %bb.0: # %entry
6732 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6733 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6734 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6735 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6736 ; NoVLX-NEXT: kmovw %k0, %eax
6737 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6738 ; NoVLX-NEXT: vzeroupper
6741 %0 = bitcast <4 x i64> %__a to <8 x i32>
6742 %load = load i32, i32* %__b
6743 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6744 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6745 %2 = icmp sgt <8 x i32> %0, %1
6746 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6747 %4 = bitcast <16 x i1> %3 to i16
6751 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
6752 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6753 ; VLX: # %bb.0: # %entry
6754 ; VLX-NEXT: kmovd %edi, %k1
6755 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
6756 ; VLX-NEXT: kmovd %k0, %eax
6757 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6758 ; VLX-NEXT: vzeroupper
6761 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6762 ; NoVLX: # %bb.0: # %entry
6763 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6764 ; NoVLX-NEXT: kmovw %edi, %k1
6765 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6766 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6767 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6768 ; NoVLX-NEXT: kmovw %k0, %eax
6769 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6770 ; NoVLX-NEXT: vzeroupper
6773 %0 = bitcast <4 x i64> %__a to <8 x i32>
6774 %load = load i32, i32* %__b
6775 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6776 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6777 %2 = icmp sgt <8 x i32> %0, %1
6778 %3 = bitcast i8 %__u to <8 x i1>
6779 %4 = and <8 x i1> %3, %2
6780 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6781 %6 = bitcast <16 x i1> %5 to i16
6786 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6787 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6788 ; VLX: # %bb.0: # %entry
6789 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
6790 ; VLX-NEXT: kmovd %k0, %eax
6791 ; VLX-NEXT: vzeroupper
6794 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6795 ; NoVLX: # %bb.0: # %entry
6796 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6797 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6798 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6799 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6800 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6801 ; NoVLX-NEXT: kmovw %k0, %eax
6802 ; NoVLX-NEXT: vzeroupper
6805 %0 = bitcast <4 x i64> %__a to <8 x i32>
6806 %1 = bitcast <4 x i64> %__b to <8 x i32>
6807 %2 = icmp sgt <8 x i32> %0, %1
6808 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6809 %4 = bitcast <32 x i1> %3 to i32
6813 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6814 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6815 ; VLX: # %bb.0: # %entry
6816 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
6817 ; VLX-NEXT: kmovd %k0, %eax
6818 ; VLX-NEXT: vzeroupper
6821 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6822 ; NoVLX: # %bb.0: # %entry
6823 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6824 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
6825 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6826 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6827 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6828 ; NoVLX-NEXT: kmovw %k0, %eax
6829 ; NoVLX-NEXT: vzeroupper
6832 %0 = bitcast <4 x i64> %__a to <8 x i32>
6833 %load = load <4 x i64>, <4 x i64>* %__b
6834 %1 = bitcast <4 x i64> %load to <8 x i32>
6835 %2 = icmp sgt <8 x i32> %0, %1
6836 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6837 %4 = bitcast <32 x i1> %3 to i32
6841 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6842 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6843 ; VLX: # %bb.0: # %entry
6844 ; VLX-NEXT: kmovd %edi, %k1
6845 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6846 ; VLX-NEXT: kmovd %k0, %eax
6847 ; VLX-NEXT: vzeroupper
6850 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6851 ; NoVLX: # %bb.0: # %entry
6852 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6853 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6854 ; NoVLX-NEXT: kmovw %edi, %k1
6855 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6856 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6857 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6858 ; NoVLX-NEXT: kmovw %k0, %eax
6859 ; NoVLX-NEXT: vzeroupper
6862 %0 = bitcast <4 x i64> %__a to <8 x i32>
6863 %1 = bitcast <4 x i64> %__b to <8 x i32>
6864 %2 = icmp sgt <8 x i32> %0, %1
6865 %3 = bitcast i8 %__u to <8 x i1>
6866 %4 = and <8 x i1> %2, %3
6867 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6868 %6 = bitcast <32 x i1> %5 to i32
6872 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6873 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6874 ; VLX: # %bb.0: # %entry
6875 ; VLX-NEXT: kmovd %edi, %k1
6876 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6877 ; VLX-NEXT: kmovd %k0, %eax
6878 ; VLX-NEXT: vzeroupper
6881 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6882 ; NoVLX: # %bb.0: # %entry
6883 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6884 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
6885 ; NoVLX-NEXT: kmovw %edi, %k1
6886 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6887 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6888 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6889 ; NoVLX-NEXT: kmovw %k0, %eax
6890 ; NoVLX-NEXT: vzeroupper
6893 %0 = bitcast <4 x i64> %__a to <8 x i32>
6894 %load = load <4 x i64>, <4 x i64>* %__b
6895 %1 = bitcast <4 x i64> %load to <8 x i32>
6896 %2 = icmp sgt <8 x i32> %0, %1
6897 %3 = bitcast i8 %__u to <8 x i1>
6898 %4 = and <8 x i1> %2, %3
6899 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6900 %6 = bitcast <32 x i1> %5 to i32
6905 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
6906 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6907 ; VLX: # %bb.0: # %entry
6908 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
6909 ; VLX-NEXT: kmovd %k0, %eax
6910 ; VLX-NEXT: vzeroupper
6913 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6914 ; NoVLX: # %bb.0: # %entry
6915 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6916 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6917 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6918 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6919 ; NoVLX-NEXT: kmovw %k0, %eax
6920 ; NoVLX-NEXT: vzeroupper
6923 %0 = bitcast <4 x i64> %__a to <8 x i32>
6924 %load = load i32, i32* %__b
6925 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6926 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6927 %2 = icmp sgt <8 x i32> %0, %1
6928 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6929 %4 = bitcast <32 x i1> %3 to i32
6933 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
6934 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6935 ; VLX: # %bb.0: # %entry
6936 ; VLX-NEXT: kmovd %edi, %k1
6937 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
6938 ; VLX-NEXT: kmovd %k0, %eax
6939 ; VLX-NEXT: vzeroupper
6942 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6943 ; NoVLX: # %bb.0: # %entry
6944 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6945 ; NoVLX-NEXT: kmovw %edi, %k1
6946 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6947 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6948 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6949 ; NoVLX-NEXT: kmovw %k0, %eax
6950 ; NoVLX-NEXT: vzeroupper
6953 %0 = bitcast <4 x i64> %__a to <8 x i32>
6954 %load = load i32, i32* %__b
6955 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6956 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6957 %2 = icmp sgt <8 x i32> %0, %1
6958 %3 = bitcast i8 %__u to <8 x i1>
6959 %4 = and <8 x i1> %3, %2
6960 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6961 %6 = bitcast <32 x i1> %5 to i32
6966 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6967 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
6968 ; VLX: # %bb.0: # %entry
6969 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
6970 ; VLX-NEXT: kmovq %k0, %rax
6971 ; VLX-NEXT: vzeroupper
6974 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
6975 ; NoVLX: # %bb.0: # %entry
6976 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6977 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6978 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6979 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6980 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6981 ; NoVLX-NEXT: kmovw %k0, %eax
6982 ; NoVLX-NEXT: vzeroupper
6985 %0 = bitcast <4 x i64> %__a to <8 x i32>
6986 %1 = bitcast <4 x i64> %__b to <8 x i32>
6987 %2 = icmp sgt <8 x i32> %0, %1
6988 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6989 %4 = bitcast <64 x i1> %3 to i64
6993 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6994 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
6995 ; VLX: # %bb.0: # %entry
6996 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
6997 ; VLX-NEXT: kmovq %k0, %rax
6998 ; VLX-NEXT: vzeroupper
7001 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7002 ; NoVLX: # %bb.0: # %entry
7003 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7004 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
7005 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7006 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7007 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7008 ; NoVLX-NEXT: kmovw %k0, %eax
7009 ; NoVLX-NEXT: vzeroupper
7012 %0 = bitcast <4 x i64> %__a to <8 x i32>
7013 %load = load <4 x i64>, <4 x i64>* %__b
7014 %1 = bitcast <4 x i64> %load to <8 x i32>
7015 %2 = icmp sgt <8 x i32> %0, %1
7016 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7017 %4 = bitcast <64 x i1> %3 to i64
7021 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
7022 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7023 ; VLX: # %bb.0: # %entry
7024 ; VLX-NEXT: kmovd %edi, %k1
7025 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
7026 ; VLX-NEXT: kmovq %k0, %rax
7027 ; VLX-NEXT: vzeroupper
7030 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7031 ; NoVLX: # %bb.0: # %entry
7032 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
7033 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7034 ; NoVLX-NEXT: kmovw %edi, %k1
7035 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7036 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7037 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7038 ; NoVLX-NEXT: kmovw %k0, %eax
7039 ; NoVLX-NEXT: vzeroupper
7042 %0 = bitcast <4 x i64> %__a to <8 x i32>
7043 %1 = bitcast <4 x i64> %__b to <8 x i32>
7044 %2 = icmp sgt <8 x i32> %0, %1
7045 %3 = bitcast i8 %__u to <8 x i1>
7046 %4 = and <8 x i1> %2, %3
7047 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7048 %6 = bitcast <64 x i1> %5 to i64
7052 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
7053 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7054 ; VLX: # %bb.0: # %entry
7055 ; VLX-NEXT: kmovd %edi, %k1
7056 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
7057 ; VLX-NEXT: kmovq %k0, %rax
7058 ; VLX-NEXT: vzeroupper
7061 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7062 ; NoVLX: # %bb.0: # %entry
7063 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7064 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
7065 ; NoVLX-NEXT: kmovw %edi, %k1
7066 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7067 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7068 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7069 ; NoVLX-NEXT: kmovw %k0, %eax
7070 ; NoVLX-NEXT: vzeroupper
7073 %0 = bitcast <4 x i64> %__a to <8 x i32>
7074 %load = load <4 x i64>, <4 x i64>* %__b
7075 %1 = bitcast <4 x i64> %load to <8 x i32>
7076 %2 = icmp sgt <8 x i32> %0, %1
7077 %3 = bitcast i8 %__u to <8 x i1>
7078 %4 = and <8 x i1> %2, %3
7079 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7080 %6 = bitcast <64 x i1> %5 to i64
7085 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
7086 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7087 ; VLX: # %bb.0: # %entry
7088 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
7089 ; VLX-NEXT: kmovq %k0, %rax
7090 ; VLX-NEXT: vzeroupper
7093 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7094 ; NoVLX: # %bb.0: # %entry
7095 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7096 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7097 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7098 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7099 ; NoVLX-NEXT: kmovw %k0, %eax
7100 ; NoVLX-NEXT: vzeroupper
7103 %0 = bitcast <4 x i64> %__a to <8 x i32>
7104 %load = load i32, i32* %__b
7105 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7106 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7107 %2 = icmp sgt <8 x i32> %0, %1
7108 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7109 %4 = bitcast <64 x i1> %3 to i64
7113 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
7114 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7115 ; VLX: # %bb.0: # %entry
7116 ; VLX-NEXT: kmovd %edi, %k1
7117 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
7118 ; VLX-NEXT: kmovq %k0, %rax
7119 ; VLX-NEXT: vzeroupper
7122 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7123 ; NoVLX: # %bb.0: # %entry
7124 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7125 ; NoVLX-NEXT: kmovw %edi, %k1
7126 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7127 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7128 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7129 ; NoVLX-NEXT: kmovw %k0, %eax
7130 ; NoVLX-NEXT: vzeroupper
7133 %0 = bitcast <4 x i64> %__a to <8 x i32>
7134 %load = load i32, i32* %__b
7135 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7136 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7137 %2 = icmp sgt <8 x i32> %0, %1
7138 %3 = bitcast i8 %__u to <8 x i1>
7139 %4 = and <8 x i1> %3, %2
7140 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7141 %6 = bitcast <64 x i1> %5 to i64
7146 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7147 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7148 ; VLX: # %bb.0: # %entry
7149 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7150 ; VLX-NEXT: kmovd %k0, %eax
7151 ; VLX-NEXT: vzeroupper
7154 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7155 ; NoVLX: # %bb.0: # %entry
7156 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7157 ; NoVLX-NEXT: kmovw %k0, %eax
7158 ; NoVLX-NEXT: vzeroupper
7161 %0 = bitcast <8 x i64> %__a to <16 x i32>
7162 %1 = bitcast <8 x i64> %__b to <16 x i32>
7163 %2 = icmp sgt <16 x i32> %0, %1
7164 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7165 %4 = bitcast <32 x i1> %3 to i32
7169 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7170 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7171 ; VLX: # %bb.0: # %entry
7172 ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7173 ; VLX-NEXT: kmovd %k0, %eax
7174 ; VLX-NEXT: vzeroupper
7177 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7178 ; NoVLX: # %bb.0: # %entry
7179 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7180 ; NoVLX-NEXT: kmovw %k0, %eax
7181 ; NoVLX-NEXT: vzeroupper
7184 %0 = bitcast <8 x i64> %__a to <16 x i32>
7185 %load = load <8 x i64>, <8 x i64>* %__b
7186 %1 = bitcast <8 x i64> %load to <16 x i32>
7187 %2 = icmp sgt <16 x i32> %0, %1
7188 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7189 %4 = bitcast <32 x i1> %3 to i32
7193 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7194 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7195 ; VLX: # %bb.0: # %entry
7196 ; VLX-NEXT: kmovd %edi, %k1
7197 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7198 ; VLX-NEXT: kmovd %k0, %eax
7199 ; VLX-NEXT: vzeroupper
7202 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7203 ; NoVLX: # %bb.0: # %entry
7204 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7205 ; NoVLX-NEXT: kmovw %k0, %eax
7206 ; NoVLX-NEXT: andl %edi, %eax
7207 ; NoVLX-NEXT: vzeroupper
7210 %0 = bitcast <8 x i64> %__a to <16 x i32>
7211 %1 = bitcast <8 x i64> %__b to <16 x i32>
7212 %2 = icmp sgt <16 x i32> %0, %1
7213 %3 = bitcast i16 %__u to <16 x i1>
7214 %4 = and <16 x i1> %2, %3
7215 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7216 %6 = bitcast <32 x i1> %5 to i32
7220 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7221 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7222 ; VLX: # %bb.0: # %entry
7223 ; VLX-NEXT: kmovd %edi, %k1
7224 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7225 ; VLX-NEXT: kmovd %k0, %eax
7226 ; VLX-NEXT: vzeroupper
7229 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7230 ; NoVLX: # %bb.0: # %entry
7231 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0
7232 ; NoVLX-NEXT: kmovw %k0, %eax
7233 ; NoVLX-NEXT: andl %edi, %eax
7234 ; NoVLX-NEXT: vzeroupper
7237 %0 = bitcast <8 x i64> %__a to <16 x i32>
7238 %load = load <8 x i64>, <8 x i64>* %__b
7239 %1 = bitcast <8 x i64> %load to <16 x i32>
7240 %2 = icmp sgt <16 x i32> %0, %1
7241 %3 = bitcast i16 %__u to <16 x i1>
7242 %4 = and <16 x i1> %2, %3
7243 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7244 %6 = bitcast <32 x i1> %5 to i32
7249 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
7250 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7251 ; VLX: # %bb.0: # %entry
7252 ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7253 ; VLX-NEXT: kmovd %k0, %eax
7254 ; VLX-NEXT: vzeroupper
7257 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7258 ; NoVLX: # %bb.0: # %entry
7259 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7260 ; NoVLX-NEXT: kmovw %k0, %eax
7261 ; NoVLX-NEXT: vzeroupper
7264 %0 = bitcast <8 x i64> %__a to <16 x i32>
7265 %load = load i32, i32* %__b
7266 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7267 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7268 %2 = icmp sgt <16 x i32> %0, %1
7269 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7270 %4 = bitcast <32 x i1> %3 to i32
7274 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
7275 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7276 ; VLX: # %bb.0: # %entry
7277 ; VLX-NEXT: kmovd %edi, %k1
7278 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7279 ; VLX-NEXT: kmovd %k0, %eax
7280 ; VLX-NEXT: vzeroupper
7283 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7284 ; NoVLX: # %bb.0: # %entry
7285 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7286 ; NoVLX-NEXT: kmovw %k0, %eax
7287 ; NoVLX-NEXT: andl %edi, %eax
7288 ; NoVLX-NEXT: vzeroupper
7291 %0 = bitcast <8 x i64> %__a to <16 x i32>
7292 %load = load i32, i32* %__b
7293 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7294 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7295 %2 = icmp sgt <16 x i32> %0, %1
7296 %3 = bitcast i16 %__u to <16 x i1>
7297 %4 = and <16 x i1> %3, %2
7298 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7299 %6 = bitcast <32 x i1> %5 to i32
7304 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7305 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7306 ; VLX: # %bb.0: # %entry
7307 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7308 ; VLX-NEXT: kmovq %k0, %rax
7309 ; VLX-NEXT: vzeroupper
7312 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7313 ; NoVLX: # %bb.0: # %entry
7314 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7315 ; NoVLX-NEXT: kmovw %k0, %eax
7316 ; NoVLX-NEXT: vzeroupper
7319 %0 = bitcast <8 x i64> %__a to <16 x i32>
7320 %1 = bitcast <8 x i64> %__b to <16 x i32>
7321 %2 = icmp sgt <16 x i32> %0, %1
7322 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7323 %4 = bitcast <64 x i1> %3 to i64
7327 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7328 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7329 ; VLX: # %bb.0: # %entry
7330 ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7331 ; VLX-NEXT: kmovq %k0, %rax
7332 ; VLX-NEXT: vzeroupper
7335 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7336 ; NoVLX: # %bb.0: # %entry
7337 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7338 ; NoVLX-NEXT: kmovw %k0, %eax
7339 ; NoVLX-NEXT: vzeroupper
7342 %0 = bitcast <8 x i64> %__a to <16 x i32>
7343 %load = load <8 x i64>, <8 x i64>* %__b
7344 %1 = bitcast <8 x i64> %load to <16 x i32>
7345 %2 = icmp sgt <16 x i32> %0, %1
7346 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7347 %4 = bitcast <64 x i1> %3 to i64
7351 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7352 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7353 ; VLX: # %bb.0: # %entry
7354 ; VLX-NEXT: kmovd %edi, %k1
7355 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7356 ; VLX-NEXT: kmovq %k0, %rax
7357 ; VLX-NEXT: vzeroupper
7360 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7361 ; NoVLX: # %bb.0: # %entry
7362 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7363 ; NoVLX-NEXT: kmovw %k0, %eax
7364 ; NoVLX-NEXT: andl %edi, %eax
7365 ; NoVLX-NEXT: vzeroupper
7368 %0 = bitcast <8 x i64> %__a to <16 x i32>
7369 %1 = bitcast <8 x i64> %__b to <16 x i32>
7370 %2 = icmp sgt <16 x i32> %0, %1
7371 %3 = bitcast i16 %__u to <16 x i1>
7372 %4 = and <16 x i1> %2, %3
7373 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7374 %6 = bitcast <64 x i1> %5 to i64
7378 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7379 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7380 ; VLX: # %bb.0: # %entry
7381 ; VLX-NEXT: kmovd %edi, %k1
7382 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7383 ; VLX-NEXT: kmovq %k0, %rax
7384 ; VLX-NEXT: vzeroupper
7387 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7388 ; NoVLX: # %bb.0: # %entry
7389 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0
7390 ; NoVLX-NEXT: kmovw %k0, %eax
7391 ; NoVLX-NEXT: andl %edi, %eax
7392 ; NoVLX-NEXT: vzeroupper
7395 %0 = bitcast <8 x i64> %__a to <16 x i32>
7396 %load = load <8 x i64>, <8 x i64>* %__b
7397 %1 = bitcast <8 x i64> %load to <16 x i32>
7398 %2 = icmp sgt <16 x i32> %0, %1
7399 %3 = bitcast i16 %__u to <16 x i1>
7400 %4 = and <16 x i1> %2, %3
7401 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7402 %6 = bitcast <64 x i1> %5 to i64
7407 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
7408 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7409 ; VLX: # %bb.0: # %entry
7410 ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7411 ; VLX-NEXT: kmovq %k0, %rax
7412 ; VLX-NEXT: vzeroupper
7415 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7416 ; NoVLX: # %bb.0: # %entry
7417 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7418 ; NoVLX-NEXT: kmovw %k0, %eax
7419 ; NoVLX-NEXT: vzeroupper
7422 %0 = bitcast <8 x i64> %__a to <16 x i32>
7423 %load = load i32, i32* %__b
7424 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7425 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7426 %2 = icmp sgt <16 x i32> %0, %1
7427 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7428 %4 = bitcast <64 x i1> %3 to i64
7432 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
7433 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7434 ; VLX: # %bb.0: # %entry
7435 ; VLX-NEXT: kmovd %edi, %k1
7436 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7437 ; VLX-NEXT: kmovq %k0, %rax
7438 ; VLX-NEXT: vzeroupper
7441 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7442 ; NoVLX: # %bb.0: # %entry
7443 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7444 ; NoVLX-NEXT: kmovw %k0, %eax
7445 ; NoVLX-NEXT: andl %edi, %eax
7446 ; NoVLX-NEXT: vzeroupper
7449 %0 = bitcast <8 x i64> %__a to <16 x i32>
7450 %load = load i32, i32* %__b
7451 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7452 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7453 %2 = icmp sgt <16 x i32> %0, %1
7454 %3 = bitcast i16 %__u to <16 x i1>
7455 %4 = and <16 x i1> %3, %2
7456 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7457 %6 = bitcast <64 x i1> %5 to i64
7462 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7463 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7464 ; VLX: # %bb.0: # %entry
7465 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7466 ; VLX-NEXT: kmovb %k0, %eax
7469 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7470 ; NoVLX: # %bb.0: # %entry
7471 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7472 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7473 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7474 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7475 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7476 ; NoVLX-NEXT: kmovw %k0, %eax
7477 ; NoVLX-NEXT: vzeroupper
7480 %0 = bitcast <2 x i64> %__a to <2 x i64>
7481 %1 = bitcast <2 x i64> %__b to <2 x i64>
7482 %2 = icmp sgt <2 x i64> %0, %1
7483 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7484 %4 = bitcast <4 x i1> %3 to i4
7488 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7489 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7490 ; VLX: # %bb.0: # %entry
7491 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7492 ; VLX-NEXT: kmovb %k0, %eax
7495 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7496 ; NoVLX: # %bb.0: # %entry
7497 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7498 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7499 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7500 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7501 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7502 ; NoVLX-NEXT: kmovw %k0, %eax
7503 ; NoVLX-NEXT: vzeroupper
7506 %0 = bitcast <2 x i64> %__a to <2 x i64>
7507 %load = load <2 x i64>, <2 x i64>* %__b
7508 %1 = bitcast <2 x i64> %load to <2 x i64>
7509 %2 = icmp sgt <2 x i64> %0, %1
7510 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7511 %4 = bitcast <4 x i1> %3 to i4
7515 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7516 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7517 ; VLX: # %bb.0: # %entry
7518 ; VLX-NEXT: kmovd %edi, %k1
7519 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7520 ; VLX-NEXT: kmovb %k0, %eax
7523 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7524 ; NoVLX: # %bb.0: # %entry
7525 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7526 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7527 ; NoVLX-NEXT: kmovw %edi, %k1
7528 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7529 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7530 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7531 ; NoVLX-NEXT: kmovw %k0, %eax
7532 ; NoVLX-NEXT: vzeroupper
7535 %0 = bitcast <2 x i64> %__a to <2 x i64>
7536 %1 = bitcast <2 x i64> %__b to <2 x i64>
7537 %2 = icmp sgt <2 x i64> %0, %1
7538 %3 = bitcast i8 %__u to <8 x i1>
7539 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7540 %4 = and <2 x i1> %2, %extract.i
7541 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7542 %6 = bitcast <4 x i1> %5 to i4
7546 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7547 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7548 ; VLX: # %bb.0: # %entry
7549 ; VLX-NEXT: kmovd %edi, %k1
7550 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7551 ; VLX-NEXT: kmovb %k0, %eax
7554 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7555 ; NoVLX: # %bb.0: # %entry
7556 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7557 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7558 ; NoVLX-NEXT: kmovw %edi, %k1
7559 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7560 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7561 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7562 ; NoVLX-NEXT: kmovw %k0, %eax
7563 ; NoVLX-NEXT: vzeroupper
7566 %0 = bitcast <2 x i64> %__a to <2 x i64>
7567 %load = load <2 x i64>, <2 x i64>* %__b
7568 %1 = bitcast <2 x i64> %load to <2 x i64>
7569 %2 = icmp sgt <2 x i64> %0, %1
7570 %3 = bitcast i8 %__u to <8 x i1>
7571 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7572 %4 = and <2 x i1> %2, %extract.i
7573 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7574 %6 = bitcast <4 x i1> %5 to i4
7579 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
7580 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7581 ; VLX: # %bb.0: # %entry
7582 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7583 ; VLX-NEXT: kmovb %k0, %eax
7586 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7587 ; NoVLX: # %bb.0: # %entry
7588 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7589 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
7590 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7591 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7592 ; NoVLX-NEXT: kmovw %k0, %eax
7593 ; NoVLX-NEXT: vzeroupper
7596 %0 = bitcast <2 x i64> %__a to <2 x i64>
7597 %load = load i64, i64* %__b
7598 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7599 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7600 %2 = icmp sgt <2 x i64> %0, %1
7601 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7602 %4 = bitcast <4 x i1> %3 to i4
7606 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
7607 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7608 ; VLX: # %bb.0: # %entry
7609 ; VLX-NEXT: kmovd %edi, %k1
7610 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7611 ; VLX-NEXT: kmovb %k0, %eax
7614 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7615 ; NoVLX: # %bb.0: # %entry
7616 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7617 ; NoVLX-NEXT: kmovw %edi, %k1
7618 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
7619 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7620 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7621 ; NoVLX-NEXT: kmovw %k0, %eax
7622 ; NoVLX-NEXT: vzeroupper
7625 %0 = bitcast <2 x i64> %__a to <2 x i64>
7626 %load = load i64, i64* %__b
7627 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7628 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7629 %2 = icmp sgt <2 x i64> %0, %1
7630 %3 = bitcast i8 %__u to <8 x i1>
7631 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7632 %4 = and <2 x i1> %extract.i, %2
7633 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7634 %6 = bitcast <4 x i1> %5 to i4
7639 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7640 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7641 ; VLX: # %bb.0: # %entry
7642 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7643 ; VLX-NEXT: kmovd %k0, %eax
7644 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7647 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7648 ; NoVLX: # %bb.0: # %entry
7649 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7650 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7651 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7652 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7653 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7654 ; NoVLX-NEXT: kmovw %k0, %eax
7655 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7656 ; NoVLX-NEXT: vzeroupper
7659 %0 = bitcast <2 x i64> %__a to <2 x i64>
7660 %1 = bitcast <2 x i64> %__b to <2 x i64>
7661 %2 = icmp sgt <2 x i64> %0, %1
7662 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7663 %4 = bitcast <8 x i1> %3 to i8
7667 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7668 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7669 ; VLX: # %bb.0: # %entry
7670 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7671 ; VLX-NEXT: kmovd %k0, %eax
7672 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7675 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7676 ; NoVLX: # %bb.0: # %entry
7677 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7678 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7679 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7680 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7681 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7682 ; NoVLX-NEXT: kmovw %k0, %eax
7683 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7684 ; NoVLX-NEXT: vzeroupper
7687 %0 = bitcast <2 x i64> %__a to <2 x i64>
7688 %load = load <2 x i64>, <2 x i64>* %__b
7689 %1 = bitcast <2 x i64> %load to <2 x i64>
7690 %2 = icmp sgt <2 x i64> %0, %1
7691 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7692 %4 = bitcast <8 x i1> %3 to i8
7696 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7697 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7698 ; VLX: # %bb.0: # %entry
7699 ; VLX-NEXT: kmovd %edi, %k1
7700 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7701 ; VLX-NEXT: kmovd %k0, %eax
7702 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7705 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7706 ; NoVLX: # %bb.0: # %entry
7707 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7708 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7709 ; NoVLX-NEXT: kmovw %edi, %k1
7710 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7711 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7712 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7713 ; NoVLX-NEXT: kmovw %k0, %eax
7714 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7715 ; NoVLX-NEXT: vzeroupper
7718 %0 = bitcast <2 x i64> %__a to <2 x i64>
7719 %1 = bitcast <2 x i64> %__b to <2 x i64>
7720 %2 = icmp sgt <2 x i64> %0, %1
7721 %3 = bitcast i8 %__u to <8 x i1>
7722 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7723 %4 = and <2 x i1> %2, %extract.i
7724 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7725 %6 = bitcast <8 x i1> %5 to i8
7729 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7730 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7731 ; VLX: # %bb.0: # %entry
7732 ; VLX-NEXT: kmovd %edi, %k1
7733 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7734 ; VLX-NEXT: kmovd %k0, %eax
7735 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7738 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7739 ; NoVLX: # %bb.0: # %entry
7740 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7741 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7742 ; NoVLX-NEXT: kmovw %edi, %k1
7743 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7744 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7745 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7746 ; NoVLX-NEXT: kmovw %k0, %eax
7747 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7748 ; NoVLX-NEXT: vzeroupper
7751 %0 = bitcast <2 x i64> %__a to <2 x i64>
7752 %load = load <2 x i64>, <2 x i64>* %__b
7753 %1 = bitcast <2 x i64> %load to <2 x i64>
7754 %2 = icmp sgt <2 x i64> %0, %1
7755 %3 = bitcast i8 %__u to <8 x i1>
7756 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7757 %4 = and <2 x i1> %2, %extract.i
7758 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7759 %6 = bitcast <8 x i1> %5 to i8
7764 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
7765 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7766 ; VLX: # %bb.0: # %entry
7767 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7768 ; VLX-NEXT: kmovd %k0, %eax
7769 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7772 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7773 ; NoVLX: # %bb.0: # %entry
7774 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7775 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
7776 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7777 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7778 ; NoVLX-NEXT: kmovw %k0, %eax
7779 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7780 ; NoVLX-NEXT: vzeroupper
7783 %0 = bitcast <2 x i64> %__a to <2 x i64>
7784 %load = load i64, i64* %__b
7785 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7786 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7787 %2 = icmp sgt <2 x i64> %0, %1
7788 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7789 %4 = bitcast <8 x i1> %3 to i8
7793 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
7794 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7795 ; VLX: # %bb.0: # %entry
7796 ; VLX-NEXT: kmovd %edi, %k1
7797 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7798 ; VLX-NEXT: kmovd %k0, %eax
7799 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7802 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7803 ; NoVLX: # %bb.0: # %entry
7804 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7805 ; NoVLX-NEXT: kmovw %edi, %k1
7806 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
7807 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7808 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7809 ; NoVLX-NEXT: kmovw %k0, %eax
7810 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7811 ; NoVLX-NEXT: vzeroupper
7814 %0 = bitcast <2 x i64> %__a to <2 x i64>
7815 %load = load i64, i64* %__b
7816 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7817 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7818 %2 = icmp sgt <2 x i64> %0, %1
7819 %3 = bitcast i8 %__u to <8 x i1>
7820 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7821 %4 = and <2 x i1> %extract.i, %2
7822 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7823 %6 = bitcast <8 x i1> %5 to i8
7828 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7829 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7830 ; VLX: # %bb.0: # %entry
7831 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7832 ; VLX-NEXT: kmovd %k0, %eax
7833 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7836 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7837 ; NoVLX: # %bb.0: # %entry
7838 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7839 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7840 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7841 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7842 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7843 ; NoVLX-NEXT: kmovw %k0, %eax
7844 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7845 ; NoVLX-NEXT: vzeroupper
7848 %0 = bitcast <2 x i64> %__a to <2 x i64>
7849 %1 = bitcast <2 x i64> %__b to <2 x i64>
7850 %2 = icmp sgt <2 x i64> %0, %1
7851 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7852 %4 = bitcast <16 x i1> %3 to i16
7856 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7857 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7858 ; VLX: # %bb.0: # %entry
7859 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7860 ; VLX-NEXT: kmovd %k0, %eax
7861 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7864 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7865 ; NoVLX: # %bb.0: # %entry
7866 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7867 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7868 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7869 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7870 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7871 ; NoVLX-NEXT: kmovw %k0, %eax
7872 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7873 ; NoVLX-NEXT: vzeroupper
7876 %0 = bitcast <2 x i64> %__a to <2 x i64>
7877 %load = load <2 x i64>, <2 x i64>* %__b
7878 %1 = bitcast <2 x i64> %load to <2 x i64>
7879 %2 = icmp sgt <2 x i64> %0, %1
7880 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7881 %4 = bitcast <16 x i1> %3 to i16
7885 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7886 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
7887 ; VLX: # %bb.0: # %entry
7888 ; VLX-NEXT: kmovd %edi, %k1
7889 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7890 ; VLX-NEXT: kmovd %k0, %eax
7891 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7894 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
7895 ; NoVLX: # %bb.0: # %entry
7896 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7897 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7898 ; NoVLX-NEXT: kmovw %edi, %k1
7899 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7900 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7901 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7902 ; NoVLX-NEXT: kmovw %k0, %eax
7903 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7904 ; NoVLX-NEXT: vzeroupper
7907 %0 = bitcast <2 x i64> %__a to <2 x i64>
7908 %1 = bitcast <2 x i64> %__b to <2 x i64>
7909 %2 = icmp sgt <2 x i64> %0, %1
7910 %3 = bitcast i8 %__u to <8 x i1>
7911 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7912 %4 = and <2 x i1> %2, %extract.i
7913 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7914 %6 = bitcast <16 x i1> %5 to i16
7918 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7919 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
7920 ; VLX: # %bb.0: # %entry
7921 ; VLX-NEXT: kmovd %edi, %k1
7922 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7923 ; VLX-NEXT: kmovd %k0, %eax
7924 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7927 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
7928 ; NoVLX: # %bb.0: # %entry
7929 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7930 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7931 ; NoVLX-NEXT: kmovw %edi, %k1
7932 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7933 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7934 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7935 ; NoVLX-NEXT: kmovw %k0, %eax
7936 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7937 ; NoVLX-NEXT: vzeroupper
7940 %0 = bitcast <2 x i64> %__a to <2 x i64>
7941 %load = load <2 x i64>, <2 x i64>* %__b
7942 %1 = bitcast <2 x i64> %load to <2 x i64>
7943 %2 = icmp sgt <2 x i64> %0, %1
7944 %3 = bitcast i8 %__u to <8 x i1>
7945 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7946 %4 = and <2 x i1> %2, %extract.i
7947 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7948 %6 = bitcast <16 x i1> %5 to i16
7953 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
7954 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
7955 ; VLX: # %bb.0: # %entry
7956 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7957 ; VLX-NEXT: kmovd %k0, %eax
7958 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7961 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
7962 ; NoVLX: # %bb.0: # %entry
7963 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7964 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
7965 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7966 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7967 ; NoVLX-NEXT: kmovw %k0, %eax
7968 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7969 ; NoVLX-NEXT: vzeroupper
7972 %0 = bitcast <2 x i64> %__a to <2 x i64>
7973 %load = load i64, i64* %__b
7974 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7975 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7976 %2 = icmp sgt <2 x i64> %0, %1
7977 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7978 %4 = bitcast <16 x i1> %3 to i16
7982 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
7983 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
7984 ; VLX: # %bb.0: # %entry
7985 ; VLX-NEXT: kmovd %edi, %k1
7986 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7987 ; VLX-NEXT: kmovd %k0, %eax
7988 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7991 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
7992 ; NoVLX: # %bb.0: # %entry
7993 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7994 ; NoVLX-NEXT: kmovw %edi, %k1
7995 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
7996 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7997 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7998 ; NoVLX-NEXT: kmovw %k0, %eax
7999 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8000 ; NoVLX-NEXT: vzeroupper
8003 %0 = bitcast <2 x i64> %__a to <2 x i64>
8004 %load = load i64, i64* %__b
8005 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8006 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8007 %2 = icmp sgt <2 x i64> %0, %1
8008 %3 = bitcast i8 %__u to <8 x i1>
8009 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8010 %4 = and <2 x i1> %extract.i, %2
8011 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8012 %6 = bitcast <16 x i1> %5 to i16
8017 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8018 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8019 ; VLX: # %bb.0: # %entry
8020 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
8021 ; VLX-NEXT: kmovd %k0, %eax
8024 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8025 ; NoVLX: # %bb.0: # %entry
8026 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8027 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8028 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8029 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8030 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8031 ; NoVLX-NEXT: kmovw %k0, %eax
8032 ; NoVLX-NEXT: vzeroupper
8035 %0 = bitcast <2 x i64> %__a to <2 x i64>
8036 %1 = bitcast <2 x i64> %__b to <2 x i64>
8037 %2 = icmp sgt <2 x i64> %0, %1
8038 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8039 %4 = bitcast <32 x i1> %3 to i32
8043 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8044 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8045 ; VLX: # %bb.0: # %entry
8046 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
8047 ; VLX-NEXT: kmovd %k0, %eax
8050 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8051 ; NoVLX: # %bb.0: # %entry
8052 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8053 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
8054 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8055 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8056 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8057 ; NoVLX-NEXT: kmovw %k0, %eax
8058 ; NoVLX-NEXT: vzeroupper
8061 %0 = bitcast <2 x i64> %__a to <2 x i64>
8062 %load = load <2 x i64>, <2 x i64>* %__b
8063 %1 = bitcast <2 x i64> %load to <2 x i64>
8064 %2 = icmp sgt <2 x i64> %0, %1
8065 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8066 %4 = bitcast <32 x i1> %3 to i32
8070 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8071 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8072 ; VLX: # %bb.0: # %entry
8073 ; VLX-NEXT: kmovd %edi, %k1
8074 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8075 ; VLX-NEXT: kmovd %k0, %eax
8078 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8079 ; NoVLX: # %bb.0: # %entry
8080 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8081 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8082 ; NoVLX-NEXT: kmovw %edi, %k1
8083 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8084 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8085 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8086 ; NoVLX-NEXT: kmovw %k0, %eax
8087 ; NoVLX-NEXT: vzeroupper
8090 %0 = bitcast <2 x i64> %__a to <2 x i64>
8091 %1 = bitcast <2 x i64> %__b to <2 x i64>
8092 %2 = icmp sgt <2 x i64> %0, %1
8093 %3 = bitcast i8 %__u to <8 x i1>
8094 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8095 %4 = and <2 x i1> %2, %extract.i
8096 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8097 %6 = bitcast <32 x i1> %5 to i32
8101 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8102 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8103 ; VLX: # %bb.0: # %entry
8104 ; VLX-NEXT: kmovd %edi, %k1
8105 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8106 ; VLX-NEXT: kmovd %k0, %eax
8109 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8110 ; NoVLX: # %bb.0: # %entry
8111 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8112 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
8113 ; NoVLX-NEXT: kmovw %edi, %k1
8114 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8115 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8116 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8117 ; NoVLX-NEXT: kmovw %k0, %eax
8118 ; NoVLX-NEXT: vzeroupper
8121 %0 = bitcast <2 x i64> %__a to <2 x i64>
8122 %load = load <2 x i64>, <2 x i64>* %__b
8123 %1 = bitcast <2 x i64> %load to <2 x i64>
8124 %2 = icmp sgt <2 x i64> %0, %1
8125 %3 = bitcast i8 %__u to <8 x i1>
8126 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8127 %4 = and <2 x i1> %2, %extract.i
8128 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8129 %6 = bitcast <32 x i1> %5 to i32
8134 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8135 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8136 ; VLX: # %bb.0: # %entry
8137 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8138 ; VLX-NEXT: kmovd %k0, %eax
8141 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8142 ; NoVLX: # %bb.0: # %entry
8143 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8144 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8145 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8146 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8147 ; NoVLX-NEXT: kmovw %k0, %eax
8148 ; NoVLX-NEXT: vzeroupper
8151 %0 = bitcast <2 x i64> %__a to <2 x i64>
8152 %load = load i64, i64* %__b
8153 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8154 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8155 %2 = icmp sgt <2 x i64> %0, %1
8156 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8157 %4 = bitcast <32 x i1> %3 to i32
8161 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8162 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8163 ; VLX: # %bb.0: # %entry
8164 ; VLX-NEXT: kmovd %edi, %k1
8165 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8166 ; VLX-NEXT: kmovd %k0, %eax
8169 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8170 ; NoVLX: # %bb.0: # %entry
8171 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8172 ; NoVLX-NEXT: kmovw %edi, %k1
8173 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8174 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8175 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8176 ; NoVLX-NEXT: kmovw %k0, %eax
8177 ; NoVLX-NEXT: vzeroupper
8180 %0 = bitcast <2 x i64> %__a to <2 x i64>
8181 %load = load i64, i64* %__b
8182 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8183 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8184 %2 = icmp sgt <2 x i64> %0, %1
8185 %3 = bitcast i8 %__u to <8 x i1>
8186 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8187 %4 = and <2 x i1> %extract.i, %2
8188 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8189 %6 = bitcast <32 x i1> %5 to i32
8194 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8195 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8196 ; VLX: # %bb.0: # %entry
8197 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
8198 ; VLX-NEXT: kmovq %k0, %rax
8201 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8202 ; NoVLX: # %bb.0: # %entry
8203 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8204 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8205 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8206 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8207 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8208 ; NoVLX-NEXT: kmovw %k0, %eax
8209 ; NoVLX-NEXT: vzeroupper
8212 %0 = bitcast <2 x i64> %__a to <2 x i64>
8213 %1 = bitcast <2 x i64> %__b to <2 x i64>
8214 %2 = icmp sgt <2 x i64> %0, %1
8215 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8216 %4 = bitcast <64 x i1> %3 to i64
8220 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8221 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8222 ; VLX: # %bb.0: # %entry
8223 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
8224 ; VLX-NEXT: kmovq %k0, %rax
8227 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8228 ; NoVLX: # %bb.0: # %entry
8229 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8230 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
8231 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8232 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8233 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8234 ; NoVLX-NEXT: kmovw %k0, %eax
8235 ; NoVLX-NEXT: vzeroupper
8238 %0 = bitcast <2 x i64> %__a to <2 x i64>
8239 %load = load <2 x i64>, <2 x i64>* %__b
8240 %1 = bitcast <2 x i64> %load to <2 x i64>
8241 %2 = icmp sgt <2 x i64> %0, %1
8242 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8243 %4 = bitcast <64 x i1> %3 to i64
8247 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8248 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8249 ; VLX: # %bb.0: # %entry
8250 ; VLX-NEXT: kmovd %edi, %k1
8251 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8252 ; VLX-NEXT: kmovq %k0, %rax
8255 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8256 ; NoVLX: # %bb.0: # %entry
8257 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8258 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8259 ; NoVLX-NEXT: kmovw %edi, %k1
8260 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8261 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8262 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8263 ; NoVLX-NEXT: kmovw %k0, %eax
8264 ; NoVLX-NEXT: vzeroupper
8267 %0 = bitcast <2 x i64> %__a to <2 x i64>
8268 %1 = bitcast <2 x i64> %__b to <2 x i64>
8269 %2 = icmp sgt <2 x i64> %0, %1
8270 %3 = bitcast i8 %__u to <8 x i1>
8271 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8272 %4 = and <2 x i1> %2, %extract.i
8273 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8274 %6 = bitcast <64 x i1> %5 to i64
8278 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8279 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8280 ; VLX: # %bb.0: # %entry
8281 ; VLX-NEXT: kmovd %edi, %k1
8282 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8283 ; VLX-NEXT: kmovq %k0, %rax
8286 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8287 ; NoVLX: # %bb.0: # %entry
8288 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8289 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
8290 ; NoVLX-NEXT: kmovw %edi, %k1
8291 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8292 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8293 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8294 ; NoVLX-NEXT: kmovw %k0, %eax
8295 ; NoVLX-NEXT: vzeroupper
8298 %0 = bitcast <2 x i64> %__a to <2 x i64>
8299 %load = load <2 x i64>, <2 x i64>* %__b
8300 %1 = bitcast <2 x i64> %load to <2 x i64>
8301 %2 = icmp sgt <2 x i64> %0, %1
8302 %3 = bitcast i8 %__u to <8 x i1>
8303 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8304 %4 = and <2 x i1> %2, %extract.i
8305 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8306 %6 = bitcast <64 x i1> %5 to i64
8311 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8312 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8313 ; VLX: # %bb.0: # %entry
8314 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8315 ; VLX-NEXT: kmovq %k0, %rax
8318 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8319 ; NoVLX: # %bb.0: # %entry
8320 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8321 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8322 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8323 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8324 ; NoVLX-NEXT: kmovw %k0, %eax
8325 ; NoVLX-NEXT: vzeroupper
8328 %0 = bitcast <2 x i64> %__a to <2 x i64>
8329 %load = load i64, i64* %__b
8330 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8331 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8332 %2 = icmp sgt <2 x i64> %0, %1
8333 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8334 %4 = bitcast <64 x i1> %3 to i64
8338 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8339 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8340 ; VLX: # %bb.0: # %entry
8341 ; VLX-NEXT: kmovd %edi, %k1
8342 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8343 ; VLX-NEXT: kmovq %k0, %rax
8346 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8347 ; NoVLX: # %bb.0: # %entry
8348 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8349 ; NoVLX-NEXT: kmovw %edi, %k1
8350 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8351 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8352 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8353 ; NoVLX-NEXT: kmovw %k0, %eax
8354 ; NoVLX-NEXT: vzeroupper
8357 %0 = bitcast <2 x i64> %__a to <2 x i64>
8358 %load = load i64, i64* %__b
8359 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8360 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8361 %2 = icmp sgt <2 x i64> %0, %1
8362 %3 = bitcast i8 %__u to <8 x i1>
8363 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8364 %4 = and <2 x i1> %extract.i, %2
8365 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8366 %6 = bitcast <64 x i1> %5 to i64
8371 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8372 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8373 ; VLX: # %bb.0: # %entry
8374 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8375 ; VLX-NEXT: kmovd %k0, %eax
8376 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8377 ; VLX-NEXT: vzeroupper
8380 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8381 ; NoVLX: # %bb.0: # %entry
8382 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8383 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8384 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8385 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8386 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8387 ; NoVLX-NEXT: kmovw %k0, %eax
8388 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8389 ; NoVLX-NEXT: vzeroupper
8392 %0 = bitcast <4 x i64> %__a to <4 x i64>
8393 %1 = bitcast <4 x i64> %__b to <4 x i64>
8394 %2 = icmp sgt <4 x i64> %0, %1
8395 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8396 %4 = bitcast <8 x i1> %3 to i8
8400 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8401 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8402 ; VLX: # %bb.0: # %entry
8403 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8404 ; VLX-NEXT: kmovd %k0, %eax
8405 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8406 ; VLX-NEXT: vzeroupper
8409 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8410 ; NoVLX: # %bb.0: # %entry
8411 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8412 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8413 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8414 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8415 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8416 ; NoVLX-NEXT: kmovw %k0, %eax
8417 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8418 ; NoVLX-NEXT: vzeroupper
8421 %0 = bitcast <4 x i64> %__a to <4 x i64>
8422 %load = load <4 x i64>, <4 x i64>* %__b
8423 %1 = bitcast <4 x i64> %load to <4 x i64>
8424 %2 = icmp sgt <4 x i64> %0, %1
8425 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8426 %4 = bitcast <8 x i1> %3 to i8
8430 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8431 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8432 ; VLX: # %bb.0: # %entry
8433 ; VLX-NEXT: kmovd %edi, %k1
8434 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8435 ; VLX-NEXT: kmovd %k0, %eax
8436 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8437 ; VLX-NEXT: vzeroupper
8440 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8441 ; NoVLX: # %bb.0: # %entry
8442 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8443 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8444 ; NoVLX-NEXT: kmovw %edi, %k1
8445 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8446 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8447 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8448 ; NoVLX-NEXT: kmovw %k0, %eax
8449 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8450 ; NoVLX-NEXT: vzeroupper
8453 %0 = bitcast <4 x i64> %__a to <4 x i64>
8454 %1 = bitcast <4 x i64> %__b to <4 x i64>
8455 %2 = icmp sgt <4 x i64> %0, %1
8456 %3 = bitcast i8 %__u to <8 x i1>
8457 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8458 %4 = and <4 x i1> %2, %extract.i
8459 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8460 %6 = bitcast <8 x i1> %5 to i8
8464 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8465 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8466 ; VLX: # %bb.0: # %entry
8467 ; VLX-NEXT: kmovd %edi, %k1
8468 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8469 ; VLX-NEXT: kmovd %k0, %eax
8470 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8471 ; VLX-NEXT: vzeroupper
8474 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8475 ; NoVLX: # %bb.0: # %entry
8476 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8477 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8478 ; NoVLX-NEXT: kmovw %edi, %k1
8479 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8480 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8481 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8482 ; NoVLX-NEXT: kmovw %k0, %eax
8483 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8484 ; NoVLX-NEXT: vzeroupper
8487 %0 = bitcast <4 x i64> %__a to <4 x i64>
8488 %load = load <4 x i64>, <4 x i64>* %__b
8489 %1 = bitcast <4 x i64> %load to <4 x i64>
8490 %2 = icmp sgt <4 x i64> %0, %1
8491 %3 = bitcast i8 %__u to <8 x i1>
8492 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8493 %4 = and <4 x i1> %2, %extract.i
8494 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8495 %6 = bitcast <8 x i1> %5 to i8
8500 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
8501 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8502 ; VLX: # %bb.0: # %entry
8503 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8504 ; VLX-NEXT: kmovd %k0, %eax
8505 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8506 ; VLX-NEXT: vzeroupper
8509 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8510 ; NoVLX: # %bb.0: # %entry
8511 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8512 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8513 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8514 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8515 ; NoVLX-NEXT: kmovw %k0, %eax
8516 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8517 ; NoVLX-NEXT: vzeroupper
8520 %0 = bitcast <4 x i64> %__a to <4 x i64>
8521 %load = load i64, i64* %__b
8522 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8523 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8524 %2 = icmp sgt <4 x i64> %0, %1
8525 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8526 %4 = bitcast <8 x i1> %3 to i8
8530 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
8531 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8532 ; VLX: # %bb.0: # %entry
8533 ; VLX-NEXT: kmovd %edi, %k1
8534 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8535 ; VLX-NEXT: kmovd %k0, %eax
8536 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8537 ; VLX-NEXT: vzeroupper
8540 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8541 ; NoVLX: # %bb.0: # %entry
8542 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8543 ; NoVLX-NEXT: kmovw %edi, %k1
8544 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8545 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8546 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8547 ; NoVLX-NEXT: kmovw %k0, %eax
8548 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8549 ; NoVLX-NEXT: vzeroupper
8552 %0 = bitcast <4 x i64> %__a to <4 x i64>
8553 %load = load i64, i64* %__b
8554 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8555 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8556 %2 = icmp sgt <4 x i64> %0, %1
8557 %3 = bitcast i8 %__u to <8 x i1>
8558 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8559 %4 = and <4 x i1> %extract.i, %2
8560 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8561 %6 = bitcast <8 x i1> %5 to i8
8566 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8567 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8568 ; VLX: # %bb.0: # %entry
8569 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8570 ; VLX-NEXT: kmovd %k0, %eax
8571 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8572 ; VLX-NEXT: vzeroupper
8575 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8576 ; NoVLX: # %bb.0: # %entry
8577 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8578 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8579 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8580 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8581 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8582 ; NoVLX-NEXT: kmovw %k0, %eax
8583 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8584 ; NoVLX-NEXT: vzeroupper
8587 %0 = bitcast <4 x i64> %__a to <4 x i64>
8588 %1 = bitcast <4 x i64> %__b to <4 x i64>
8589 %2 = icmp sgt <4 x i64> %0, %1
8590 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8591 %4 = bitcast <16 x i1> %3 to i16
8595 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8596 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8597 ; VLX: # %bb.0: # %entry
8598 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8599 ; VLX-NEXT: kmovd %k0, %eax
8600 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8601 ; VLX-NEXT: vzeroupper
8604 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8605 ; NoVLX: # %bb.0: # %entry
8606 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8607 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8608 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8609 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8610 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8611 ; NoVLX-NEXT: kmovw %k0, %eax
8612 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8613 ; NoVLX-NEXT: vzeroupper
8616 %0 = bitcast <4 x i64> %__a to <4 x i64>
8617 %load = load <4 x i64>, <4 x i64>* %__b
8618 %1 = bitcast <4 x i64> %load to <4 x i64>
8619 %2 = icmp sgt <4 x i64> %0, %1
8620 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8621 %4 = bitcast <16 x i1> %3 to i16
8625 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8626 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8627 ; VLX: # %bb.0: # %entry
8628 ; VLX-NEXT: kmovd %edi, %k1
8629 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8630 ; VLX-NEXT: kmovd %k0, %eax
8631 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8632 ; VLX-NEXT: vzeroupper
8635 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8636 ; NoVLX: # %bb.0: # %entry
8637 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8638 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8639 ; NoVLX-NEXT: kmovw %edi, %k1
8640 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8641 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8642 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8643 ; NoVLX-NEXT: kmovw %k0, %eax
8644 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8645 ; NoVLX-NEXT: vzeroupper
8648 %0 = bitcast <4 x i64> %__a to <4 x i64>
8649 %1 = bitcast <4 x i64> %__b to <4 x i64>
8650 %2 = icmp sgt <4 x i64> %0, %1
8651 %3 = bitcast i8 %__u to <8 x i1>
8652 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8653 %4 = and <4 x i1> %2, %extract.i
8654 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8655 %6 = bitcast <16 x i1> %5 to i16
8659 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8660 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8661 ; VLX: # %bb.0: # %entry
8662 ; VLX-NEXT: kmovd %edi, %k1
8663 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8664 ; VLX-NEXT: kmovd %k0, %eax
8665 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8666 ; VLX-NEXT: vzeroupper
8669 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8670 ; NoVLX: # %bb.0: # %entry
8671 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8672 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8673 ; NoVLX-NEXT: kmovw %edi, %k1
8674 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8675 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8676 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8677 ; NoVLX-NEXT: kmovw %k0, %eax
8678 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8679 ; NoVLX-NEXT: vzeroupper
8682 %0 = bitcast <4 x i64> %__a to <4 x i64>
8683 %load = load <4 x i64>, <4 x i64>* %__b
8684 %1 = bitcast <4 x i64> %load to <4 x i64>
8685 %2 = icmp sgt <4 x i64> %0, %1
8686 %3 = bitcast i8 %__u to <8 x i1>
8687 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8688 %4 = and <4 x i1> %2, %extract.i
8689 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8690 %6 = bitcast <16 x i1> %5 to i16
8695 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
8696 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8697 ; VLX: # %bb.0: # %entry
8698 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8699 ; VLX-NEXT: kmovd %k0, %eax
8700 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8701 ; VLX-NEXT: vzeroupper
8704 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8705 ; NoVLX: # %bb.0: # %entry
8706 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8707 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8708 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8709 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8710 ; NoVLX-NEXT: kmovw %k0, %eax
8711 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8712 ; NoVLX-NEXT: vzeroupper
8715 %0 = bitcast <4 x i64> %__a to <4 x i64>
8716 %load = load i64, i64* %__b
8717 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8718 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8719 %2 = icmp sgt <4 x i64> %0, %1
8720 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8721 %4 = bitcast <16 x i1> %3 to i16
8725 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
8726 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8727 ; VLX: # %bb.0: # %entry
8728 ; VLX-NEXT: kmovd %edi, %k1
8729 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8730 ; VLX-NEXT: kmovd %k0, %eax
8731 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8732 ; VLX-NEXT: vzeroupper
8735 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8736 ; NoVLX: # %bb.0: # %entry
8737 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8738 ; NoVLX-NEXT: kmovw %edi, %k1
8739 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8740 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8741 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8742 ; NoVLX-NEXT: kmovw %k0, %eax
8743 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8744 ; NoVLX-NEXT: vzeroupper
8747 %0 = bitcast <4 x i64> %__a to <4 x i64>
8748 %load = load i64, i64* %__b
8749 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8750 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8751 %2 = icmp sgt <4 x i64> %0, %1
8752 %3 = bitcast i8 %__u to <8 x i1>
8753 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8754 %4 = and <4 x i1> %extract.i, %2
8755 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8756 %6 = bitcast <16 x i1> %5 to i16
8761 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8762 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8763 ; VLX: # %bb.0: # %entry
8764 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8765 ; VLX-NEXT: kmovd %k0, %eax
8766 ; VLX-NEXT: vzeroupper
8769 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8770 ; NoVLX: # %bb.0: # %entry
8771 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8772 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8773 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8774 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8775 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8776 ; NoVLX-NEXT: kmovw %k0, %eax
8777 ; NoVLX-NEXT: vzeroupper
8780 %0 = bitcast <4 x i64> %__a to <4 x i64>
8781 %1 = bitcast <4 x i64> %__b to <4 x i64>
8782 %2 = icmp sgt <4 x i64> %0, %1
8783 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8784 %4 = bitcast <32 x i1> %3 to i32
8788 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8789 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8790 ; VLX: # %bb.0: # %entry
8791 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8792 ; VLX-NEXT: kmovd %k0, %eax
8793 ; VLX-NEXT: vzeroupper
8796 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8797 ; NoVLX: # %bb.0: # %entry
8798 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8799 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8800 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8801 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8802 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8803 ; NoVLX-NEXT: kmovw %k0, %eax
8804 ; NoVLX-NEXT: vzeroupper
8807 %0 = bitcast <4 x i64> %__a to <4 x i64>
8808 %load = load <4 x i64>, <4 x i64>* %__b
8809 %1 = bitcast <4 x i64> %load to <4 x i64>
8810 %2 = icmp sgt <4 x i64> %0, %1
8811 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8812 %4 = bitcast <32 x i1> %3 to i32
8816 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8817 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8818 ; VLX: # %bb.0: # %entry
8819 ; VLX-NEXT: kmovd %edi, %k1
8820 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8821 ; VLX-NEXT: kmovd %k0, %eax
8822 ; VLX-NEXT: vzeroupper
8825 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8826 ; NoVLX: # %bb.0: # %entry
8827 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8828 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8829 ; NoVLX-NEXT: kmovw %edi, %k1
8830 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8831 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8832 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8833 ; NoVLX-NEXT: kmovw %k0, %eax
8834 ; NoVLX-NEXT: vzeroupper
8837 %0 = bitcast <4 x i64> %__a to <4 x i64>
8838 %1 = bitcast <4 x i64> %__b to <4 x i64>
8839 %2 = icmp sgt <4 x i64> %0, %1
8840 %3 = bitcast i8 %__u to <8 x i1>
8841 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8842 %4 = and <4 x i1> %2, %extract.i
8843 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8844 %6 = bitcast <32 x i1> %5 to i32
8848 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8849 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
8850 ; VLX: # %bb.0: # %entry
8851 ; VLX-NEXT: kmovd %edi, %k1
8852 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8853 ; VLX-NEXT: kmovd %k0, %eax
8854 ; VLX-NEXT: vzeroupper
8857 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
8858 ; NoVLX: # %bb.0: # %entry
8859 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8860 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8861 ; NoVLX-NEXT: kmovw %edi, %k1
8862 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8863 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8864 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8865 ; NoVLX-NEXT: kmovw %k0, %eax
8866 ; NoVLX-NEXT: vzeroupper
8869 %0 = bitcast <4 x i64> %__a to <4 x i64>
8870 %load = load <4 x i64>, <4 x i64>* %__b
8871 %1 = bitcast <4 x i64> %load to <4 x i64>
8872 %2 = icmp sgt <4 x i64> %0, %1
8873 %3 = bitcast i8 %__u to <8 x i1>
8874 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8875 %4 = and <4 x i1> %2, %extract.i
8876 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8877 %6 = bitcast <32 x i1> %5 to i32
8882 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
8883 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8884 ; VLX: # %bb.0: # %entry
8885 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8886 ; VLX-NEXT: kmovd %k0, %eax
8887 ; VLX-NEXT: vzeroupper
8890 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8891 ; NoVLX: # %bb.0: # %entry
8892 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8893 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8894 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8895 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8896 ; NoVLX-NEXT: kmovw %k0, %eax
8897 ; NoVLX-NEXT: vzeroupper
8900 %0 = bitcast <4 x i64> %__a to <4 x i64>
8901 %load = load i64, i64* %__b
8902 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8903 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8904 %2 = icmp sgt <4 x i64> %0, %1
8905 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8906 %4 = bitcast <32 x i1> %3 to i32
8910 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
8911 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8912 ; VLX: # %bb.0: # %entry
8913 ; VLX-NEXT: kmovd %edi, %k1
8914 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8915 ; VLX-NEXT: kmovd %k0, %eax
8916 ; VLX-NEXT: vzeroupper
8919 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8920 ; NoVLX: # %bb.0: # %entry
8921 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8922 ; NoVLX-NEXT: kmovw %edi, %k1
8923 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8924 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8925 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8926 ; NoVLX-NEXT: kmovw %k0, %eax
8927 ; NoVLX-NEXT: vzeroupper
8930 %0 = bitcast <4 x i64> %__a to <4 x i64>
8931 %load = load i64, i64* %__b
8932 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8933 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8934 %2 = icmp sgt <4 x i64> %0, %1
8935 %3 = bitcast i8 %__u to <8 x i1>
8936 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8937 %4 = and <4 x i1> %extract.i, %2
8938 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8939 %6 = bitcast <32 x i1> %5 to i32
8944 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8945 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
8946 ; VLX: # %bb.0: # %entry
8947 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8948 ; VLX-NEXT: kmovq %k0, %rax
8949 ; VLX-NEXT: vzeroupper
8952 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
8953 ; NoVLX: # %bb.0: # %entry
8954 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8955 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8956 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8957 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8958 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8959 ; NoVLX-NEXT: kmovw %k0, %eax
8960 ; NoVLX-NEXT: vzeroupper
8963 %0 = bitcast <4 x i64> %__a to <4 x i64>
8964 %1 = bitcast <4 x i64> %__b to <4 x i64>
8965 %2 = icmp sgt <4 x i64> %0, %1
8966 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8967 %4 = bitcast <64 x i1> %3 to i64
8971 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8972 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
8973 ; VLX: # %bb.0: # %entry
8974 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8975 ; VLX-NEXT: kmovq %k0, %rax
8976 ; VLX-NEXT: vzeroupper
8979 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
8980 ; NoVLX: # %bb.0: # %entry
8981 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8982 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8983 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8984 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8985 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8986 ; NoVLX-NEXT: kmovw %k0, %eax
8987 ; NoVLX-NEXT: vzeroupper
8990 %0 = bitcast <4 x i64> %__a to <4 x i64>
8991 %load = load <4 x i64>, <4 x i64>* %__b
8992 %1 = bitcast <4 x i64> %load to <4 x i64>
8993 %2 = icmp sgt <4 x i64> %0, %1
8994 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8995 %4 = bitcast <64 x i1> %3 to i64
8999 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9000 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9001 ; VLX: # %bb.0: # %entry
9002 ; VLX-NEXT: kmovd %edi, %k1
9003 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
9004 ; VLX-NEXT: kmovq %k0, %rax
9005 ; VLX-NEXT: vzeroupper
9008 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9009 ; NoVLX: # %bb.0: # %entry
9010 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
9011 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9012 ; NoVLX-NEXT: kmovw %edi, %k1
9013 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9014 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9015 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9016 ; NoVLX-NEXT: kmovw %k0, %eax
9017 ; NoVLX-NEXT: vzeroupper
9020 %0 = bitcast <4 x i64> %__a to <4 x i64>
9021 %1 = bitcast <4 x i64> %__b to <4 x i64>
9022 %2 = icmp sgt <4 x i64> %0, %1
9023 %3 = bitcast i8 %__u to <8 x i1>
9024 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9025 %4 = and <4 x i1> %2, %extract.i
9026 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9027 %6 = bitcast <64 x i1> %5 to i64
9031 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
9032 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9033 ; VLX: # %bb.0: # %entry
9034 ; VLX-NEXT: kmovd %edi, %k1
9035 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
9036 ; VLX-NEXT: kmovq %k0, %rax
9037 ; VLX-NEXT: vzeroupper
9040 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9041 ; NoVLX: # %bb.0: # %entry
9042 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9043 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
9044 ; NoVLX-NEXT: kmovw %edi, %k1
9045 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9046 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9047 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9048 ; NoVLX-NEXT: kmovw %k0, %eax
9049 ; NoVLX-NEXT: vzeroupper
9052 %0 = bitcast <4 x i64> %__a to <4 x i64>
9053 %load = load <4 x i64>, <4 x i64>* %__b
9054 %1 = bitcast <4 x i64> %load to <4 x i64>
9055 %2 = icmp sgt <4 x i64> %0, %1
9056 %3 = bitcast i8 %__u to <8 x i1>
9057 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9058 %4 = and <4 x i1> %2, %extract.i
9059 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9060 %6 = bitcast <64 x i1> %5 to i64
9065 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
9066 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9067 ; VLX: # %bb.0: # %entry
9068 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
9069 ; VLX-NEXT: kmovq %k0, %rax
9070 ; VLX-NEXT: vzeroupper
9073 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9074 ; NoVLX: # %bb.0: # %entry
9075 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9076 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9077 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9078 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9079 ; NoVLX-NEXT: kmovw %k0, %eax
9080 ; NoVLX-NEXT: vzeroupper
9083 %0 = bitcast <4 x i64> %__a to <4 x i64>
9084 %load = load i64, i64* %__b
9085 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9086 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9087 %2 = icmp sgt <4 x i64> %0, %1
9088 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9089 %4 = bitcast <64 x i1> %3 to i64
9093 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
9094 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9095 ; VLX: # %bb.0: # %entry
9096 ; VLX-NEXT: kmovd %edi, %k1
9097 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
9098 ; VLX-NEXT: kmovq %k0, %rax
9099 ; VLX-NEXT: vzeroupper
9102 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9103 ; NoVLX: # %bb.0: # %entry
9104 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9105 ; NoVLX-NEXT: kmovw %edi, %k1
9106 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9107 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9108 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9109 ; NoVLX-NEXT: kmovw %k0, %eax
9110 ; NoVLX-NEXT: vzeroupper
9113 %0 = bitcast <4 x i64> %__a to <4 x i64>
9114 %load = load i64, i64* %__b
9115 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9116 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9117 %2 = icmp sgt <4 x i64> %0, %1
9118 %3 = bitcast i8 %__u to <8 x i1>
9119 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9120 %4 = and <4 x i1> %extract.i, %2
9121 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9122 %6 = bitcast <64 x i1> %5 to i64
9127 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9128 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9129 ; VLX: # %bb.0: # %entry
9130 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9131 ; VLX-NEXT: kmovd %k0, %eax
9132 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9133 ; VLX-NEXT: vzeroupper
9136 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9137 ; NoVLX: # %bb.0: # %entry
9138 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9139 ; NoVLX-NEXT: kmovw %k0, %eax
9140 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9141 ; NoVLX-NEXT: vzeroupper
9144 %0 = bitcast <8 x i64> %__a to <8 x i64>
9145 %1 = bitcast <8 x i64> %__b to <8 x i64>
9146 %2 = icmp sgt <8 x i64> %0, %1
9147 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9148 %4 = bitcast <16 x i1> %3 to i16
9152 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9153 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9154 ; VLX: # %bb.0: # %entry
9155 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9156 ; VLX-NEXT: kmovd %k0, %eax
9157 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9158 ; VLX-NEXT: vzeroupper
9161 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9162 ; NoVLX: # %bb.0: # %entry
9163 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9164 ; NoVLX-NEXT: kmovw %k0, %eax
9165 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9166 ; NoVLX-NEXT: vzeroupper
9169 %0 = bitcast <8 x i64> %__a to <8 x i64>
9170 %load = load <8 x i64>, <8 x i64>* %__b
9171 %1 = bitcast <8 x i64> %load to <8 x i64>
9172 %2 = icmp sgt <8 x i64> %0, %1
9173 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9174 %4 = bitcast <16 x i1> %3 to i16
9178 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9179 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9180 ; VLX: # %bb.0: # %entry
9181 ; VLX-NEXT: kmovd %edi, %k1
9182 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9183 ; VLX-NEXT: kmovd %k0, %eax
9184 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9185 ; VLX-NEXT: vzeroupper
9188 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9189 ; NoVLX: # %bb.0: # %entry
9190 ; NoVLX-NEXT: kmovw %edi, %k1
9191 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9192 ; NoVLX-NEXT: kmovw %k0, %eax
9193 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9194 ; NoVLX-NEXT: vzeroupper
9197 %0 = bitcast <8 x i64> %__a to <8 x i64>
9198 %1 = bitcast <8 x i64> %__b to <8 x i64>
9199 %2 = icmp sgt <8 x i64> %0, %1
9200 %3 = bitcast i8 %__u to <8 x i1>
9201 %4 = and <8 x i1> %2, %3
9202 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9203 %6 = bitcast <16 x i1> %5 to i16
9207 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9208 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9209 ; VLX: # %bb.0: # %entry
9210 ; VLX-NEXT: kmovd %edi, %k1
9211 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9212 ; VLX-NEXT: kmovd %k0, %eax
9213 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9214 ; VLX-NEXT: vzeroupper
9217 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9218 ; NoVLX: # %bb.0: # %entry
9219 ; NoVLX-NEXT: kmovw %edi, %k1
9220 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9221 ; NoVLX-NEXT: kmovw %k0, %eax
9222 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9223 ; NoVLX-NEXT: vzeroupper
9226 %0 = bitcast <8 x i64> %__a to <8 x i64>
9227 %load = load <8 x i64>, <8 x i64>* %__b
9228 %1 = bitcast <8 x i64> %load to <8 x i64>
9229 %2 = icmp sgt <8 x i64> %0, %1
9230 %3 = bitcast i8 %__u to <8 x i1>
9231 %4 = and <8 x i1> %2, %3
9232 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9233 %6 = bitcast <16 x i1> %5 to i16
9238 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9239 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9240 ; VLX: # %bb.0: # %entry
9241 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9242 ; VLX-NEXT: kmovd %k0, %eax
9243 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9244 ; VLX-NEXT: vzeroupper
9247 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9248 ; NoVLX: # %bb.0: # %entry
9249 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9250 ; NoVLX-NEXT: kmovw %k0, %eax
9251 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9252 ; NoVLX-NEXT: vzeroupper
9255 %0 = bitcast <8 x i64> %__a to <8 x i64>
9256 %load = load i64, i64* %__b
9257 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9258 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9259 %2 = icmp sgt <8 x i64> %0, %1
9260 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9261 %4 = bitcast <16 x i1> %3 to i16
9265 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9266 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9267 ; VLX: # %bb.0: # %entry
9268 ; VLX-NEXT: kmovd %edi, %k1
9269 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9270 ; VLX-NEXT: kmovd %k0, %eax
9271 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9272 ; VLX-NEXT: vzeroupper
9275 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9276 ; NoVLX: # %bb.0: # %entry
9277 ; NoVLX-NEXT: kmovw %edi, %k1
9278 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9279 ; NoVLX-NEXT: kmovw %k0, %eax
9280 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9281 ; NoVLX-NEXT: vzeroupper
9284 %0 = bitcast <8 x i64> %__a to <8 x i64>
9285 %load = load i64, i64* %__b
9286 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9287 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9288 %2 = icmp sgt <8 x i64> %0, %1
9289 %3 = bitcast i8 %__u to <8 x i1>
9290 %4 = and <8 x i1> %3, %2
9291 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9292 %6 = bitcast <16 x i1> %5 to i16
9297 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9298 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9299 ; VLX: # %bb.0: # %entry
9300 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9301 ; VLX-NEXT: kmovd %k0, %eax
9302 ; VLX-NEXT: vzeroupper
9305 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9306 ; NoVLX: # %bb.0: # %entry
9307 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9308 ; NoVLX-NEXT: kmovw %k0, %eax
9309 ; NoVLX-NEXT: vzeroupper
9312 %0 = bitcast <8 x i64> %__a to <8 x i64>
9313 %1 = bitcast <8 x i64> %__b to <8 x i64>
9314 %2 = icmp sgt <8 x i64> %0, %1
9315 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9316 %4 = bitcast <32 x i1> %3 to i32
9320 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9321 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9322 ; VLX: # %bb.0: # %entry
9323 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9324 ; VLX-NEXT: kmovd %k0, %eax
9325 ; VLX-NEXT: vzeroupper
9328 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9329 ; NoVLX: # %bb.0: # %entry
9330 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9331 ; NoVLX-NEXT: kmovw %k0, %eax
9332 ; NoVLX-NEXT: vzeroupper
9335 %0 = bitcast <8 x i64> %__a to <8 x i64>
9336 %load = load <8 x i64>, <8 x i64>* %__b
9337 %1 = bitcast <8 x i64> %load to <8 x i64>
9338 %2 = icmp sgt <8 x i64> %0, %1
9339 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9340 %4 = bitcast <32 x i1> %3 to i32
9344 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9345 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9346 ; VLX: # %bb.0: # %entry
9347 ; VLX-NEXT: kmovd %edi, %k1
9348 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9349 ; VLX-NEXT: kmovd %k0, %eax
9350 ; VLX-NEXT: vzeroupper
9353 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9354 ; NoVLX: # %bb.0: # %entry
9355 ; NoVLX-NEXT: kmovw %edi, %k1
9356 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9357 ; NoVLX-NEXT: kmovw %k0, %eax
9358 ; NoVLX-NEXT: vzeroupper
9361 %0 = bitcast <8 x i64> %__a to <8 x i64>
9362 %1 = bitcast <8 x i64> %__b to <8 x i64>
9363 %2 = icmp sgt <8 x i64> %0, %1
9364 %3 = bitcast i8 %__u to <8 x i1>
9365 %4 = and <8 x i1> %2, %3
9366 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9367 %6 = bitcast <32 x i1> %5 to i32
9371 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9372 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9373 ; VLX: # %bb.0: # %entry
9374 ; VLX-NEXT: kmovd %edi, %k1
9375 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9376 ; VLX-NEXT: kmovd %k0, %eax
9377 ; VLX-NEXT: vzeroupper
9380 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9381 ; NoVLX: # %bb.0: # %entry
9382 ; NoVLX-NEXT: kmovw %edi, %k1
9383 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9384 ; NoVLX-NEXT: kmovw %k0, %eax
9385 ; NoVLX-NEXT: vzeroupper
9388 %0 = bitcast <8 x i64> %__a to <8 x i64>
9389 %load = load <8 x i64>, <8 x i64>* %__b
9390 %1 = bitcast <8 x i64> %load to <8 x i64>
9391 %2 = icmp sgt <8 x i64> %0, %1
9392 %3 = bitcast i8 %__u to <8 x i1>
9393 %4 = and <8 x i1> %2, %3
9394 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9395 %6 = bitcast <32 x i1> %5 to i32
9400 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9401 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9402 ; VLX: # %bb.0: # %entry
9403 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9404 ; VLX-NEXT: kmovd %k0, %eax
9405 ; VLX-NEXT: vzeroupper
9408 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9409 ; NoVLX: # %bb.0: # %entry
9410 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9411 ; NoVLX-NEXT: kmovw %k0, %eax
9412 ; NoVLX-NEXT: vzeroupper
9415 %0 = bitcast <8 x i64> %__a to <8 x i64>
9416 %load = load i64, i64* %__b
9417 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9418 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9419 %2 = icmp sgt <8 x i64> %0, %1
9420 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9421 %4 = bitcast <32 x i1> %3 to i32
9425 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9426 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9427 ; VLX: # %bb.0: # %entry
9428 ; VLX-NEXT: kmovd %edi, %k1
9429 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9430 ; VLX-NEXT: kmovd %k0, %eax
9431 ; VLX-NEXT: vzeroupper
9434 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9435 ; NoVLX: # %bb.0: # %entry
9436 ; NoVLX-NEXT: kmovw %edi, %k1
9437 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9438 ; NoVLX-NEXT: kmovw %k0, %eax
9439 ; NoVLX-NEXT: vzeroupper
9442 %0 = bitcast <8 x i64> %__a to <8 x i64>
9443 %load = load i64, i64* %__b
9444 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9445 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9446 %2 = icmp sgt <8 x i64> %0, %1
9447 %3 = bitcast i8 %__u to <8 x i1>
9448 %4 = and <8 x i1> %3, %2
9449 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9450 %6 = bitcast <32 x i1> %5 to i32
9455 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9456 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9457 ; VLX: # %bb.0: # %entry
9458 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9459 ; VLX-NEXT: kmovq %k0, %rax
9460 ; VLX-NEXT: vzeroupper
9463 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9464 ; NoVLX: # %bb.0: # %entry
9465 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9466 ; NoVLX-NEXT: kmovw %k0, %eax
9467 ; NoVLX-NEXT: vzeroupper
9470 %0 = bitcast <8 x i64> %__a to <8 x i64>
9471 %1 = bitcast <8 x i64> %__b to <8 x i64>
9472 %2 = icmp sgt <8 x i64> %0, %1
9473 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9474 %4 = bitcast <64 x i1> %3 to i64
9478 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9479 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9480 ; VLX: # %bb.0: # %entry
9481 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9482 ; VLX-NEXT: kmovq %k0, %rax
9483 ; VLX-NEXT: vzeroupper
9486 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9487 ; NoVLX: # %bb.0: # %entry
9488 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9489 ; NoVLX-NEXT: kmovw %k0, %eax
9490 ; NoVLX-NEXT: vzeroupper
9493 %0 = bitcast <8 x i64> %__a to <8 x i64>
9494 %load = load <8 x i64>, <8 x i64>* %__b
9495 %1 = bitcast <8 x i64> %load to <8 x i64>
9496 %2 = icmp sgt <8 x i64> %0, %1
9497 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9498 %4 = bitcast <64 x i1> %3 to i64
9502 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9503 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9504 ; VLX: # %bb.0: # %entry
9505 ; VLX-NEXT: kmovd %edi, %k1
9506 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9507 ; VLX-NEXT: kmovq %k0, %rax
9508 ; VLX-NEXT: vzeroupper
9511 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9512 ; NoVLX: # %bb.0: # %entry
9513 ; NoVLX-NEXT: kmovw %edi, %k1
9514 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9515 ; NoVLX-NEXT: kmovw %k0, %eax
9516 ; NoVLX-NEXT: vzeroupper
9519 %0 = bitcast <8 x i64> %__a to <8 x i64>
9520 %1 = bitcast <8 x i64> %__b to <8 x i64>
9521 %2 = icmp sgt <8 x i64> %0, %1
9522 %3 = bitcast i8 %__u to <8 x i1>
9523 %4 = and <8 x i1> %2, %3
9524 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9525 %6 = bitcast <64 x i1> %5 to i64
9529 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9530 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9531 ; VLX: # %bb.0: # %entry
9532 ; VLX-NEXT: kmovd %edi, %k1
9533 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9534 ; VLX-NEXT: kmovq %k0, %rax
9535 ; VLX-NEXT: vzeroupper
9538 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9539 ; NoVLX: # %bb.0: # %entry
9540 ; NoVLX-NEXT: kmovw %edi, %k1
9541 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9542 ; NoVLX-NEXT: kmovw %k0, %eax
9543 ; NoVLX-NEXT: vzeroupper
9546 %0 = bitcast <8 x i64> %__a to <8 x i64>
9547 %load = load <8 x i64>, <8 x i64>* %__b
9548 %1 = bitcast <8 x i64> %load to <8 x i64>
9549 %2 = icmp sgt <8 x i64> %0, %1
9550 %3 = bitcast i8 %__u to <8 x i1>
9551 %4 = and <8 x i1> %2, %3
9552 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9553 %6 = bitcast <64 x i1> %5 to i64
9558 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9559 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9560 ; VLX: # %bb.0: # %entry
9561 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9562 ; VLX-NEXT: kmovq %k0, %rax
9563 ; VLX-NEXT: vzeroupper
9566 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9567 ; NoVLX: # %bb.0: # %entry
9568 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9569 ; NoVLX-NEXT: kmovw %k0, %eax
9570 ; NoVLX-NEXT: vzeroupper
9573 %0 = bitcast <8 x i64> %__a to <8 x i64>
9574 %load = load i64, i64* %__b
9575 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9576 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9577 %2 = icmp sgt <8 x i64> %0, %1
9578 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9579 %4 = bitcast <64 x i1> %3 to i64
9583 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9584 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9585 ; VLX: # %bb.0: # %entry
9586 ; VLX-NEXT: kmovd %edi, %k1
9587 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9588 ; VLX-NEXT: kmovq %k0, %rax
9589 ; VLX-NEXT: vzeroupper
9592 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9593 ; NoVLX: # %bb.0: # %entry
9594 ; NoVLX-NEXT: kmovw %edi, %k1
9595 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9596 ; NoVLX-NEXT: kmovw %k0, %eax
9597 ; NoVLX-NEXT: vzeroupper
9600 %0 = bitcast <8 x i64> %__a to <8 x i64>
9601 %load = load i64, i64* %__b
9602 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9603 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9604 %2 = icmp sgt <8 x i64> %0, %1
9605 %3 = bitcast i8 %__u to <8 x i1>
9606 %4 = and <8 x i1> %3, %2
9607 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9608 %6 = bitcast <64 x i1> %5 to i64
9613 define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9614 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9615 ; VLX: # %bb.0: # %entry
9616 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0
9617 ; VLX-NEXT: kmovd %k0, %eax
9620 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9621 ; NoVLX: # %bb.0: # %entry
9622 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9623 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9624 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9625 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9626 ; NoVLX-NEXT: kmovw %k0, %eax
9627 ; NoVLX-NEXT: vzeroupper
9630 %0 = bitcast <2 x i64> %__a to <16 x i8>
9631 %1 = bitcast <2 x i64> %__b to <16 x i8>
9632 %2 = icmp sge <16 x i8> %0, %1
9633 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9634 %4 = bitcast <32 x i1> %3 to i32
9638 define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9639 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9640 ; VLX: # %bb.0: # %entry
9641 ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
9642 ; VLX-NEXT: kmovd %k0, %eax
9645 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9646 ; NoVLX: # %bb.0: # %entry
9647 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
9648 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9649 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9650 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9651 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9652 ; NoVLX-NEXT: kmovw %k0, %eax
9653 ; NoVLX-NEXT: vzeroupper
9656 %0 = bitcast <2 x i64> %__a to <16 x i8>
9657 %load = load <2 x i64>, <2 x i64>* %__b
9658 %1 = bitcast <2 x i64> %load to <16 x i8>
9659 %2 = icmp sge <16 x i8> %0, %1
9660 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9661 %4 = bitcast <32 x i1> %3 to i32
9665 define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9666 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9667 ; VLX: # %bb.0: # %entry
9668 ; VLX-NEXT: kmovd %edi, %k1
9669 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9670 ; VLX-NEXT: kmovd %k0, %eax
9673 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9674 ; NoVLX: # %bb.0: # %entry
9675 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9676 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9677 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9678 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9679 ; NoVLX-NEXT: kmovw %k0, %eax
9680 ; NoVLX-NEXT: andl %edi, %eax
9681 ; NoVLX-NEXT: vzeroupper
9684 %0 = bitcast <2 x i64> %__a to <16 x i8>
9685 %1 = bitcast <2 x i64> %__b to <16 x i8>
9686 %2 = icmp sge <16 x i8> %0, %1
9687 %3 = bitcast i16 %__u to <16 x i1>
9688 %4 = and <16 x i1> %2, %3
9689 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9690 %6 = bitcast <32 x i1> %5 to i32
9694 define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9695 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9696 ; VLX: # %bb.0: # %entry
9697 ; VLX-NEXT: kmovd %edi, %k1
9698 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9699 ; VLX-NEXT: kmovd %k0, %eax
9702 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9703 ; NoVLX: # %bb.0: # %entry
9704 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
9705 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9706 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9707 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9708 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9709 ; NoVLX-NEXT: kmovw %k0, %eax
9710 ; NoVLX-NEXT: andl %edi, %eax
9711 ; NoVLX-NEXT: vzeroupper
9714 %0 = bitcast <2 x i64> %__a to <16 x i8>
9715 %load = load <2 x i64>, <2 x i64>* %__b
9716 %1 = bitcast <2 x i64> %load to <16 x i8>
9717 %2 = icmp sge <16 x i8> %0, %1
9718 %3 = bitcast i16 %__u to <16 x i1>
9719 %4 = and <16 x i1> %2, %3
9720 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9721 %6 = bitcast <32 x i1> %5 to i32
9726 define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9727 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9728 ; VLX: # %bb.0: # %entry
9729 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0
9730 ; VLX-NEXT: kmovq %k0, %rax
9733 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9734 ; NoVLX: # %bb.0: # %entry
9735 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9736 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9737 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9738 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9739 ; NoVLX-NEXT: kmovw %k0, %eax
9740 ; NoVLX-NEXT: vzeroupper
9743 %0 = bitcast <2 x i64> %__a to <16 x i8>
9744 %1 = bitcast <2 x i64> %__b to <16 x i8>
9745 %2 = icmp sge <16 x i8> %0, %1
9746 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9747 %4 = bitcast <64 x i1> %3 to i64
9751 define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9752 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9753 ; VLX: # %bb.0: # %entry
9754 ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
9755 ; VLX-NEXT: kmovq %k0, %rax
9758 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9759 ; NoVLX: # %bb.0: # %entry
9760 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
9761 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9762 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9763 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9764 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9765 ; NoVLX-NEXT: kmovw %k0, %eax
9766 ; NoVLX-NEXT: vzeroupper
9769 %0 = bitcast <2 x i64> %__a to <16 x i8>
9770 %load = load <2 x i64>, <2 x i64>* %__b
9771 %1 = bitcast <2 x i64> %load to <16 x i8>
9772 %2 = icmp sge <16 x i8> %0, %1
9773 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9774 %4 = bitcast <64 x i1> %3 to i64
9778 define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9779 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9780 ; VLX: # %bb.0: # %entry
9781 ; VLX-NEXT: kmovd %edi, %k1
9782 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9783 ; VLX-NEXT: kmovq %k0, %rax
9786 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9787 ; NoVLX: # %bb.0: # %entry
9788 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9789 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9790 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9791 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9792 ; NoVLX-NEXT: kmovw %k0, %eax
9793 ; NoVLX-NEXT: andl %edi, %eax
9794 ; NoVLX-NEXT: vzeroupper
9797 %0 = bitcast <2 x i64> %__a to <16 x i8>
9798 %1 = bitcast <2 x i64> %__b to <16 x i8>
9799 %2 = icmp sge <16 x i8> %0, %1
9800 %3 = bitcast i16 %__u to <16 x i1>
9801 %4 = and <16 x i1> %2, %3
9802 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9803 %6 = bitcast <64 x i1> %5 to i64
9807 define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9808 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9809 ; VLX: # %bb.0: # %entry
9810 ; VLX-NEXT: kmovd %edi, %k1
9811 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9812 ; VLX-NEXT: kmovq %k0, %rax
9815 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9816 ; NoVLX: # %bb.0: # %entry
9817 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
9818 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9819 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9820 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9821 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9822 ; NoVLX-NEXT: kmovw %k0, %eax
9823 ; NoVLX-NEXT: andl %edi, %eax
9824 ; NoVLX-NEXT: vzeroupper
9827 %0 = bitcast <2 x i64> %__a to <16 x i8>
9828 %load = load <2 x i64>, <2 x i64>* %__b
9829 %1 = bitcast <2 x i64> %load to <16 x i8>
9830 %2 = icmp sge <16 x i8> %0, %1
9831 %3 = bitcast i16 %__u to <16 x i1>
9832 %4 = and <16 x i1> %2, %3
9833 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9834 %6 = bitcast <64 x i1> %5 to i64
9839 define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9840 ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
9841 ; VLX: # %bb.0: # %entry
9842 ; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0
9843 ; VLX-NEXT: kmovq %k0, %rax
9844 ; VLX-NEXT: vzeroupper
9847 ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
9848 ; NoVLX: # %bb.0: # %entry
9849 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9850 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9851 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9852 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9853 ; NoVLX-NEXT: kmovw %k0, %ecx
9854 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9855 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9856 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9857 ; NoVLX-NEXT: kmovw %k0, %eax
9858 ; NoVLX-NEXT: shll $16, %eax
9859 ; NoVLX-NEXT: orl %ecx, %eax
9860 ; NoVLX-NEXT: vzeroupper
9863 %0 = bitcast <4 x i64> %__a to <32 x i8>
9864 %1 = bitcast <4 x i64> %__b to <32 x i8>
9865 %2 = icmp sge <32 x i8> %0, %1
9866 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9867 %4 = bitcast <64 x i1> %3 to i64
9871 define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
9872 ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
9873 ; VLX: # %bb.0: # %entry
9874 ; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0
9875 ; VLX-NEXT: kmovq %k0, %rax
9876 ; VLX-NEXT: vzeroupper
9879 ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
9880 ; NoVLX: # %bb.0: # %entry
9881 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
9882 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9883 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9884 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9885 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9886 ; NoVLX-NEXT: kmovw %k0, %ecx
9887 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9888 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9889 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9890 ; NoVLX-NEXT: kmovw %k0, %eax
9891 ; NoVLX-NEXT: shll $16, %eax
9892 ; NoVLX-NEXT: orl %ecx, %eax
9893 ; NoVLX-NEXT: vzeroupper
9896 %0 = bitcast <4 x i64> %__a to <32 x i8>
9897 %load = load <4 x i64>, <4 x i64>* %__b
9898 %1 = bitcast <4 x i64> %load to <32 x i8>
9899 %2 = icmp sge <32 x i8> %0, %1
9900 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9901 %4 = bitcast <64 x i1> %3 to i64
9905 define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9906 ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
9907 ; VLX: # %bb.0: # %entry
9908 ; VLX-NEXT: kmovd %edi, %k1
9909 ; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1}
9910 ; VLX-NEXT: kmovq %k0, %rax
9911 ; VLX-NEXT: vzeroupper
9914 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
9915 ; NoVLX: # %bb.0: # %entry
9916 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9917 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9918 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9919 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9920 ; NoVLX-NEXT: kmovw %k0, %eax
9921 ; NoVLX-NEXT: andl %edi, %eax
9922 ; NoVLX-NEXT: shrl $16, %edi
9923 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9924 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9925 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9926 ; NoVLX-NEXT: kmovw %k0, %ecx
9927 ; NoVLX-NEXT: andl %edi, %ecx
9928 ; NoVLX-NEXT: shll $16, %ecx
9929 ; NoVLX-NEXT: movzwl %ax, %eax
9930 ; NoVLX-NEXT: orl %ecx, %eax
9931 ; NoVLX-NEXT: vzeroupper
9934 %0 = bitcast <4 x i64> %__a to <32 x i8>
9935 %1 = bitcast <4 x i64> %__b to <32 x i8>
9936 %2 = icmp sge <32 x i8> %0, %1
9937 %3 = bitcast i32 %__u to <32 x i1>
9938 %4 = and <32 x i1> %2, %3
9939 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9940 %6 = bitcast <64 x i1> %5 to i64
9944 define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
9945 ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
9946 ; VLX: # %bb.0: # %entry
9947 ; VLX-NEXT: kmovd %edi, %k1
9948 ; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
9949 ; VLX-NEXT: kmovq %k0, %rax
9950 ; VLX-NEXT: vzeroupper
9953 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
9954 ; NoVLX: # %bb.0: # %entry
9955 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
9956 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9957 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9958 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9959 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9960 ; NoVLX-NEXT: kmovw %k0, %eax
9961 ; NoVLX-NEXT: andl %edi, %eax
9962 ; NoVLX-NEXT: shrl $16, %edi
9963 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9964 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9965 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9966 ; NoVLX-NEXT: kmovw %k0, %ecx
9967 ; NoVLX-NEXT: andl %edi, %ecx
9968 ; NoVLX-NEXT: shll $16, %ecx
9969 ; NoVLX-NEXT: movzwl %ax, %eax
9970 ; NoVLX-NEXT: orl %ecx, %eax
9971 ; NoVLX-NEXT: vzeroupper
9974 %0 = bitcast <4 x i64> %__a to <32 x i8>
9975 %load = load <4 x i64>, <4 x i64>* %__b
9976 %1 = bitcast <4 x i64> %load to <32 x i8>
9977 %2 = icmp sge <32 x i8> %0, %1
9978 %3 = bitcast i32 %__u to <32 x i1>
9979 %4 = and <32 x i1> %2, %3
9980 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9981 %6 = bitcast <64 x i1> %5 to i64
9986 define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9987 ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
9988 ; VLX: # %bb.0: # %entry
9989 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
9990 ; VLX-NEXT: kmovd %k0, %eax
9991 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9994 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
9995 ; NoVLX: # %bb.0: # %entry
9996 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
9997 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9998 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
9999 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10000 ; NoVLX-NEXT: kmovw %k0, %eax
10001 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10002 ; NoVLX-NEXT: vzeroupper
10005 %0 = bitcast <2 x i64> %__a to <8 x i16>
10006 %1 = bitcast <2 x i64> %__b to <8 x i16>
10007 %2 = icmp sge <8 x i16> %0, %1
10008 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10009 %4 = bitcast <16 x i1> %3 to i16
10013 define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10014 ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10015 ; VLX: # %bb.0: # %entry
10016 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10017 ; VLX-NEXT: kmovd %k0, %eax
10018 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10021 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10022 ; NoVLX: # %bb.0: # %entry
10023 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10024 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10025 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10026 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10027 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10028 ; NoVLX-NEXT: kmovw %k0, %eax
10029 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10030 ; NoVLX-NEXT: vzeroupper
10033 %0 = bitcast <2 x i64> %__a to <8 x i16>
10034 %load = load <2 x i64>, <2 x i64>* %__b
10035 %1 = bitcast <2 x i64> %load to <8 x i16>
10036 %2 = icmp sge <8 x i16> %0, %1
10037 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10038 %4 = bitcast <16 x i1> %3 to i16
10042 define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10043 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10044 ; VLX: # %bb.0: # %entry
10045 ; VLX-NEXT: kmovd %edi, %k1
10046 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10047 ; VLX-NEXT: kmovd %k0, %eax
10048 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10051 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10052 ; NoVLX: # %bb.0: # %entry
10053 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10054 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10055 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10056 ; NoVLX-NEXT: kmovw %edi, %k1
10057 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10058 ; NoVLX-NEXT: kmovw %k0, %eax
10059 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10060 ; NoVLX-NEXT: vzeroupper
10063 %0 = bitcast <2 x i64> %__a to <8 x i16>
10064 %1 = bitcast <2 x i64> %__b to <8 x i16>
10065 %2 = icmp sge <8 x i16> %0, %1
10066 %3 = bitcast i8 %__u to <8 x i1>
10067 %4 = and <8 x i1> %2, %3
10068 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10069 %6 = bitcast <16 x i1> %5 to i16
10073 define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10074 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10075 ; VLX: # %bb.0: # %entry
10076 ; VLX-NEXT: kmovd %edi, %k1
10077 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10078 ; VLX-NEXT: kmovd %k0, %eax
10079 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10082 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10083 ; NoVLX: # %bb.0: # %entry
10084 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10085 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10086 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10087 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10088 ; NoVLX-NEXT: kmovw %edi, %k1
10089 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10090 ; NoVLX-NEXT: kmovw %k0, %eax
10091 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10092 ; NoVLX-NEXT: vzeroupper
10095 %0 = bitcast <2 x i64> %__a to <8 x i16>
10096 %load = load <2 x i64>, <2 x i64>* %__b
10097 %1 = bitcast <2 x i64> %load to <8 x i16>
10098 %2 = icmp sge <8 x i16> %0, %1
10099 %3 = bitcast i8 %__u to <8 x i1>
10100 %4 = and <8 x i1> %2, %3
10101 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10102 %6 = bitcast <16 x i1> %5 to i16
10107 define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10108 ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10109 ; VLX: # %bb.0: # %entry
10110 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10111 ; VLX-NEXT: kmovd %k0, %eax
10114 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10115 ; NoVLX: # %bb.0: # %entry
10116 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10117 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10118 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10119 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10120 ; NoVLX-NEXT: kmovw %k0, %eax
10121 ; NoVLX-NEXT: vzeroupper
10124 %0 = bitcast <2 x i64> %__a to <8 x i16>
10125 %1 = bitcast <2 x i64> %__b to <8 x i16>
10126 %2 = icmp sge <8 x i16> %0, %1
10127 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10128 %4 = bitcast <32 x i1> %3 to i32
10132 define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10133 ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10134 ; VLX: # %bb.0: # %entry
10135 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10136 ; VLX-NEXT: kmovd %k0, %eax
10139 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10140 ; NoVLX: # %bb.0: # %entry
10141 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10142 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10143 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10144 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10145 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10146 ; NoVLX-NEXT: kmovw %k0, %eax
10147 ; NoVLX-NEXT: vzeroupper
10150 %0 = bitcast <2 x i64> %__a to <8 x i16>
10151 %load = load <2 x i64>, <2 x i64>* %__b
10152 %1 = bitcast <2 x i64> %load to <8 x i16>
10153 %2 = icmp sge <8 x i16> %0, %1
10154 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10155 %4 = bitcast <32 x i1> %3 to i32
10159 define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10160 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10161 ; VLX: # %bb.0: # %entry
10162 ; VLX-NEXT: kmovd %edi, %k1
10163 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10164 ; VLX-NEXT: kmovd %k0, %eax
10167 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10168 ; NoVLX: # %bb.0: # %entry
10169 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10170 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10171 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10172 ; NoVLX-NEXT: kmovw %edi, %k1
10173 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10174 ; NoVLX-NEXT: kmovw %k0, %eax
10175 ; NoVLX-NEXT: vzeroupper
10178 %0 = bitcast <2 x i64> %__a to <8 x i16>
10179 %1 = bitcast <2 x i64> %__b to <8 x i16>
10180 %2 = icmp sge <8 x i16> %0, %1
10181 %3 = bitcast i8 %__u to <8 x i1>
10182 %4 = and <8 x i1> %2, %3
10183 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10184 %6 = bitcast <32 x i1> %5 to i32
10188 define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10189 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10190 ; VLX: # %bb.0: # %entry
10191 ; VLX-NEXT: kmovd %edi, %k1
10192 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10193 ; VLX-NEXT: kmovd %k0, %eax
10196 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10197 ; NoVLX: # %bb.0: # %entry
10198 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10199 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10200 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10201 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10202 ; NoVLX-NEXT: kmovw %edi, %k1
10203 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10204 ; NoVLX-NEXT: kmovw %k0, %eax
10205 ; NoVLX-NEXT: vzeroupper
10208 %0 = bitcast <2 x i64> %__a to <8 x i16>
10209 %load = load <2 x i64>, <2 x i64>* %__b
10210 %1 = bitcast <2 x i64> %load to <8 x i16>
10211 %2 = icmp sge <8 x i16> %0, %1
10212 %3 = bitcast i8 %__u to <8 x i1>
10213 %4 = and <8 x i1> %2, %3
10214 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10215 %6 = bitcast <32 x i1> %5 to i32
10220 define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10221 ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10222 ; VLX: # %bb.0: # %entry
10223 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10224 ; VLX-NEXT: kmovq %k0, %rax
10227 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10228 ; NoVLX: # %bb.0: # %entry
10229 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10230 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10231 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10232 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10233 ; NoVLX-NEXT: kmovw %k0, %eax
10234 ; NoVLX-NEXT: vzeroupper
10237 %0 = bitcast <2 x i64> %__a to <8 x i16>
10238 %1 = bitcast <2 x i64> %__b to <8 x i16>
10239 %2 = icmp sge <8 x i16> %0, %1
10240 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10241 %4 = bitcast <64 x i1> %3 to i64
10245 define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10246 ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10247 ; VLX: # %bb.0: # %entry
10248 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10249 ; VLX-NEXT: kmovq %k0, %rax
10252 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10253 ; NoVLX: # %bb.0: # %entry
10254 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10255 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10256 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10257 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10258 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10259 ; NoVLX-NEXT: kmovw %k0, %eax
10260 ; NoVLX-NEXT: vzeroupper
10263 %0 = bitcast <2 x i64> %__a to <8 x i16>
10264 %load = load <2 x i64>, <2 x i64>* %__b
10265 %1 = bitcast <2 x i64> %load to <8 x i16>
10266 %2 = icmp sge <8 x i16> %0, %1
10267 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10268 %4 = bitcast <64 x i1> %3 to i64
10272 define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10273 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10274 ; VLX: # %bb.0: # %entry
10275 ; VLX-NEXT: kmovd %edi, %k1
10276 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10277 ; VLX-NEXT: kmovq %k0, %rax
10280 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10281 ; NoVLX: # %bb.0: # %entry
10282 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10283 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10284 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10285 ; NoVLX-NEXT: kmovw %edi, %k1
10286 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10287 ; NoVLX-NEXT: kmovw %k0, %eax
10288 ; NoVLX-NEXT: vzeroupper
10291 %0 = bitcast <2 x i64> %__a to <8 x i16>
10292 %1 = bitcast <2 x i64> %__b to <8 x i16>
10293 %2 = icmp sge <8 x i16> %0, %1
10294 %3 = bitcast i8 %__u to <8 x i1>
10295 %4 = and <8 x i1> %2, %3
10296 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10297 %6 = bitcast <64 x i1> %5 to i64
10301 define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10302 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10303 ; VLX: # %bb.0: # %entry
10304 ; VLX-NEXT: kmovd %edi, %k1
10305 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10306 ; VLX-NEXT: kmovq %k0, %rax
10309 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10310 ; NoVLX: # %bb.0: # %entry
10311 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10312 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10313 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10314 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10315 ; NoVLX-NEXT: kmovw %edi, %k1
10316 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10317 ; NoVLX-NEXT: kmovw %k0, %eax
10318 ; NoVLX-NEXT: vzeroupper
10321 %0 = bitcast <2 x i64> %__a to <8 x i16>
10322 %load = load <2 x i64>, <2 x i64>* %__b
10323 %1 = bitcast <2 x i64> %load to <8 x i16>
10324 %2 = icmp sge <8 x i16> %0, %1
10325 %3 = bitcast i8 %__u to <8 x i1>
10326 %4 = and <8 x i1> %2, %3
10327 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10328 %6 = bitcast <64 x i1> %5 to i64
10333 define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10334 ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10335 ; VLX: # %bb.0: # %entry
10336 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0
10337 ; VLX-NEXT: kmovd %k0, %eax
10338 ; VLX-NEXT: vzeroupper
10341 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10342 ; NoVLX: # %bb.0: # %entry
10343 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10344 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10345 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10346 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10347 ; NoVLX-NEXT: kmovw %k0, %eax
10348 ; NoVLX-NEXT: vzeroupper
10351 %0 = bitcast <4 x i64> %__a to <16 x i16>
10352 %1 = bitcast <4 x i64> %__b to <16 x i16>
10353 %2 = icmp sge <16 x i16> %0, %1
10354 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10355 %4 = bitcast <32 x i1> %3 to i32
10359 define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10360 ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10361 ; VLX: # %bb.0: # %entry
10362 ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
10363 ; VLX-NEXT: kmovd %k0, %eax
10364 ; VLX-NEXT: vzeroupper
10367 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10368 ; NoVLX: # %bb.0: # %entry
10369 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
10370 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10371 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10372 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10373 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10374 ; NoVLX-NEXT: kmovw %k0, %eax
10375 ; NoVLX-NEXT: vzeroupper
10378 %0 = bitcast <4 x i64> %__a to <16 x i16>
10379 %load = load <4 x i64>, <4 x i64>* %__b
10380 %1 = bitcast <4 x i64> %load to <16 x i16>
10381 %2 = icmp sge <16 x i16> %0, %1
10382 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10383 %4 = bitcast <32 x i1> %3 to i32
10387 define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10388 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10389 ; VLX: # %bb.0: # %entry
10390 ; VLX-NEXT: kmovd %edi, %k1
10391 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10392 ; VLX-NEXT: kmovd %k0, %eax
10393 ; VLX-NEXT: vzeroupper
10396 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10397 ; NoVLX: # %bb.0: # %entry
10398 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10399 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10400 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10401 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10402 ; NoVLX-NEXT: kmovw %k0, %eax
10403 ; NoVLX-NEXT: andl %edi, %eax
10404 ; NoVLX-NEXT: vzeroupper
10407 %0 = bitcast <4 x i64> %__a to <16 x i16>
10408 %1 = bitcast <4 x i64> %__b to <16 x i16>
10409 %2 = icmp sge <16 x i16> %0, %1
10410 %3 = bitcast i16 %__u to <16 x i1>
10411 %4 = and <16 x i1> %2, %3
10412 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10413 %6 = bitcast <32 x i1> %5 to i32
10417 define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10418 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10419 ; VLX: # %bb.0: # %entry
10420 ; VLX-NEXT: kmovd %edi, %k1
10421 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10422 ; VLX-NEXT: kmovd %k0, %eax
10423 ; VLX-NEXT: vzeroupper
10426 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10427 ; NoVLX: # %bb.0: # %entry
10428 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10429 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10430 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10431 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10432 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10433 ; NoVLX-NEXT: kmovw %k0, %eax
10434 ; NoVLX-NEXT: andl %edi, %eax
10435 ; NoVLX-NEXT: vzeroupper
10438 %0 = bitcast <4 x i64> %__a to <16 x i16>
10439 %load = load <4 x i64>, <4 x i64>* %__b
10440 %1 = bitcast <4 x i64> %load to <16 x i16>
10441 %2 = icmp sge <16 x i16> %0, %1
10442 %3 = bitcast i16 %__u to <16 x i1>
10443 %4 = and <16 x i1> %2, %3
10444 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10445 %6 = bitcast <32 x i1> %5 to i32
10450 define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10451 ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10452 ; VLX: # %bb.0: # %entry
10453 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0
10454 ; VLX-NEXT: kmovq %k0, %rax
10455 ; VLX-NEXT: vzeroupper
10458 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10459 ; NoVLX: # %bb.0: # %entry
10460 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10461 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10462 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10463 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10464 ; NoVLX-NEXT: kmovw %k0, %eax
10465 ; NoVLX-NEXT: vzeroupper
10468 %0 = bitcast <4 x i64> %__a to <16 x i16>
10469 %1 = bitcast <4 x i64> %__b to <16 x i16>
10470 %2 = icmp sge <16 x i16> %0, %1
10471 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10472 %4 = bitcast <64 x i1> %3 to i64
10476 define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10477 ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10478 ; VLX: # %bb.0: # %entry
10479 ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
10480 ; VLX-NEXT: kmovq %k0, %rax
10481 ; VLX-NEXT: vzeroupper
10484 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10485 ; NoVLX: # %bb.0: # %entry
10486 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
10487 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10488 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10489 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10490 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10491 ; NoVLX-NEXT: kmovw %k0, %eax
10492 ; NoVLX-NEXT: vzeroupper
10495 %0 = bitcast <4 x i64> %__a to <16 x i16>
10496 %load = load <4 x i64>, <4 x i64>* %__b
10497 %1 = bitcast <4 x i64> %load to <16 x i16>
10498 %2 = icmp sge <16 x i16> %0, %1
10499 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10500 %4 = bitcast <64 x i1> %3 to i64
10504 define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10505 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10506 ; VLX: # %bb.0: # %entry
10507 ; VLX-NEXT: kmovd %edi, %k1
10508 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10509 ; VLX-NEXT: kmovq %k0, %rax
10510 ; VLX-NEXT: vzeroupper
10513 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10514 ; NoVLX: # %bb.0: # %entry
10515 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10516 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10517 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10518 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10519 ; NoVLX-NEXT: kmovw %k0, %eax
10520 ; NoVLX-NEXT: andl %edi, %eax
10521 ; NoVLX-NEXT: vzeroupper
10524 %0 = bitcast <4 x i64> %__a to <16 x i16>
10525 %1 = bitcast <4 x i64> %__b to <16 x i16>
10526 %2 = icmp sge <16 x i16> %0, %1
10527 %3 = bitcast i16 %__u to <16 x i1>
10528 %4 = and <16 x i1> %2, %3
10529 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10530 %6 = bitcast <64 x i1> %5 to i64
10534 define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10535 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10536 ; VLX: # %bb.0: # %entry
10537 ; VLX-NEXT: kmovd %edi, %k1
10538 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10539 ; VLX-NEXT: kmovq %k0, %rax
10540 ; VLX-NEXT: vzeroupper
10543 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10544 ; NoVLX: # %bb.0: # %entry
10545 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10546 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10547 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10548 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10549 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10550 ; NoVLX-NEXT: kmovw %k0, %eax
10551 ; NoVLX-NEXT: andl %edi, %eax
10552 ; NoVLX-NEXT: vzeroupper
10555 %0 = bitcast <4 x i64> %__a to <16 x i16>
10556 %load = load <4 x i64>, <4 x i64>* %__b
10557 %1 = bitcast <4 x i64> %load to <16 x i16>
10558 %2 = icmp sge <16 x i16> %0, %1
10559 %3 = bitcast i16 %__u to <16 x i1>
10560 %4 = and <16 x i1> %2, %3
10561 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10562 %6 = bitcast <64 x i1> %5 to i64
10567 define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10568 ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10569 ; VLX: # %bb.0: # %entry
10570 ; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
10571 ; VLX-NEXT: kmovq %k0, %rax
10572 ; VLX-NEXT: vzeroupper
10575 ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10576 ; NoVLX: # %bb.0: # %entry
10577 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
10578 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
10579 ; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
10580 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10581 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10582 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10583 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10584 ; NoVLX-NEXT: kmovw %k0, %ecx
10585 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
10586 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
10587 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10588 ; NoVLX-NEXT: kmovw %k0, %eax
10589 ; NoVLX-NEXT: shll $16, %eax
10590 ; NoVLX-NEXT: orl %ecx, %eax
10591 ; NoVLX-NEXT: vzeroupper
10594 %0 = bitcast <8 x i64> %__a to <32 x i16>
10595 %1 = bitcast <8 x i64> %__b to <32 x i16>
10596 %2 = icmp sge <32 x i16> %0, %1
10597 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10598 %4 = bitcast <64 x i1> %3 to i64
10602 define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
10603 ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10604 ; VLX: # %bb.0: # %entry
10605 ; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0
10606 ; VLX-NEXT: kmovq %k0, %rax
10607 ; VLX-NEXT: vzeroupper
10610 ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10611 ; NoVLX: # %bb.0: # %entry
10612 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
10613 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
10614 ; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm3
10615 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
10616 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
10617 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10618 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10619 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10620 ; NoVLX-NEXT: kmovw %k0, %ecx
10621 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
10622 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
10623 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10624 ; NoVLX-NEXT: kmovw %k0, %eax
10625 ; NoVLX-NEXT: shll $16, %eax
10626 ; NoVLX-NEXT: orl %ecx, %eax
10627 ; NoVLX-NEXT: vzeroupper
10630 %0 = bitcast <8 x i64> %__a to <32 x i16>
10631 %load = load <8 x i64>, <8 x i64>* %__b
10632 %1 = bitcast <8 x i64> %load to <32 x i16>
10633 %2 = icmp sge <32 x i16> %0, %1
10634 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10635 %4 = bitcast <64 x i1> %3 to i64
10639 define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10640 ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10641 ; VLX: # %bb.0: # %entry
10642 ; VLX-NEXT: kmovd %edi, %k1
10643 ; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
10644 ; VLX-NEXT: kmovq %k0, %rax
10645 ; VLX-NEXT: vzeroupper
10648 ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10649 ; NoVLX: # %bb.0: # %entry
10650 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
10651 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
10652 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
10653 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
10654 ; NoVLX-NEXT: kmovw %k0, %eax
10655 ; NoVLX-NEXT: andl %edi, %eax
10656 ; NoVLX-NEXT: shrl $16, %edi
10657 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
10658 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
10659 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10660 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10661 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10662 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10663 ; NoVLX-NEXT: kmovw %k0, %ecx
10664 ; NoVLX-NEXT: andl %edi, %ecx
10665 ; NoVLX-NEXT: shll $16, %ecx
10666 ; NoVLX-NEXT: movzwl %ax, %eax
10667 ; NoVLX-NEXT: orl %ecx, %eax
10668 ; NoVLX-NEXT: vzeroupper
10671 %0 = bitcast <8 x i64> %__a to <32 x i16>
10672 %1 = bitcast <8 x i64> %__b to <32 x i16>
10673 %2 = icmp sge <32 x i16> %0, %1
10674 %3 = bitcast i32 %__u to <32 x i1>
10675 %4 = and <32 x i1> %2, %3
10676 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10677 %6 = bitcast <64 x i1> %5 to i64
10681 define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
10682 ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10683 ; VLX: # %bb.0: # %entry
10684 ; VLX-NEXT: kmovd %edi, %k1
10685 ; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
10686 ; VLX-NEXT: kmovq %k0, %rax
10687 ; VLX-NEXT: vzeroupper
10690 ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10691 ; NoVLX: # %bb.0: # %entry
10692 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10693 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1
10694 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
10695 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
10696 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
10697 ; NoVLX-NEXT: kmovw %k0, %eax
10698 ; NoVLX-NEXT: andl %edi, %eax
10699 ; NoVLX-NEXT: shrl $16, %edi
10700 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
10701 ; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1
10702 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10703 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10704 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10705 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10706 ; NoVLX-NEXT: kmovw %k0, %ecx
10707 ; NoVLX-NEXT: andl %edi, %ecx
10708 ; NoVLX-NEXT: shll $16, %ecx
10709 ; NoVLX-NEXT: movzwl %ax, %eax
10710 ; NoVLX-NEXT: orl %ecx, %eax
10711 ; NoVLX-NEXT: vzeroupper
10714 %0 = bitcast <8 x i64> %__a to <32 x i16>
10715 %load = load <8 x i64>, <8 x i64>* %__b
10716 %1 = bitcast <8 x i64> %load to <32 x i16>
10717 %2 = icmp sge <32 x i16> %0, %1
10718 %3 = bitcast i32 %__u to <32 x i1>
10719 %4 = and <32 x i1> %2, %3
10720 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10721 %6 = bitcast <64 x i1> %5 to i64
10726 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10727 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10728 ; VLX: # %bb.0: # %entry
10729 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
10730 ; VLX-NEXT: kmovd %k0, %eax
10731 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10734 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10735 ; NoVLX: # %bb.0: # %entry
10736 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10737 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10738 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10739 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10740 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10741 ; NoVLX-NEXT: kmovw %k0, %eax
10742 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10743 ; NoVLX-NEXT: vzeroupper
10746 %0 = bitcast <2 x i64> %__a to <4 x i32>
10747 %1 = bitcast <2 x i64> %__b to <4 x i32>
10748 %2 = icmp sge <4 x i32> %0, %1
10749 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10750 %4 = bitcast <8 x i1> %3 to i8
10754 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10755 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10756 ; VLX: # %bb.0: # %entry
10757 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
10758 ; VLX-NEXT: kmovd %k0, %eax
10759 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10762 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10763 ; NoVLX: # %bb.0: # %entry
10764 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10765 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10766 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10767 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10768 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10769 ; NoVLX-NEXT: kmovw %k0, %eax
10770 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10771 ; NoVLX-NEXT: vzeroupper
10774 %0 = bitcast <2 x i64> %__a to <4 x i32>
10775 %load = load <2 x i64>, <2 x i64>* %__b
10776 %1 = bitcast <2 x i64> %load to <4 x i32>
10777 %2 = icmp sge <4 x i32> %0, %1
10778 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10779 %4 = bitcast <8 x i1> %3 to i8
10783 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10784 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10785 ; VLX: # %bb.0: # %entry
10786 ; VLX-NEXT: kmovd %edi, %k1
10787 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
10788 ; VLX-NEXT: kmovd %k0, %eax
10789 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10792 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10793 ; NoVLX: # %bb.0: # %entry
10794 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10795 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10796 ; NoVLX-NEXT: kmovw %edi, %k1
10797 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10798 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10799 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10800 ; NoVLX-NEXT: kmovw %k0, %eax
10801 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10802 ; NoVLX-NEXT: vzeroupper
10805 %0 = bitcast <2 x i64> %__a to <4 x i32>
10806 %1 = bitcast <2 x i64> %__b to <4 x i32>
10807 %2 = icmp sge <4 x i32> %0, %1
10808 %3 = bitcast i8 %__u to <8 x i1>
10809 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10810 %4 = and <4 x i1> %2, %extract.i
10811 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10812 %6 = bitcast <8 x i1> %5 to i8
10816 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10817 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
10818 ; VLX: # %bb.0: # %entry
10819 ; VLX-NEXT: kmovd %edi, %k1
10820 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
10821 ; VLX-NEXT: kmovd %k0, %eax
10822 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10825 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
10826 ; NoVLX: # %bb.0: # %entry
10827 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10828 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10829 ; NoVLX-NEXT: kmovw %edi, %k1
10830 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10831 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10832 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10833 ; NoVLX-NEXT: kmovw %k0, %eax
10834 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10835 ; NoVLX-NEXT: vzeroupper
10838 %0 = bitcast <2 x i64> %__a to <4 x i32>
10839 %load = load <2 x i64>, <2 x i64>* %__b
10840 %1 = bitcast <2 x i64> %load to <4 x i32>
10841 %2 = icmp sge <4 x i32> %0, %1
10842 %3 = bitcast i8 %__u to <8 x i1>
10843 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10844 %4 = and <4 x i1> %2, %extract.i
10845 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10846 %6 = bitcast <8 x i1> %5 to i8
10851 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
10852 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
10853 ; VLX: # %bb.0: # %entry
10854 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
10855 ; VLX-NEXT: kmovd %k0, %eax
10856 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10859 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
10860 ; NoVLX: # %bb.0: # %entry
10861 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10862 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
10863 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10864 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10865 ; NoVLX-NEXT: kmovw %k0, %eax
10866 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10867 ; NoVLX-NEXT: vzeroupper
10870 %0 = bitcast <2 x i64> %__a to <4 x i32>
10871 %load = load i32, i32* %__b
10872 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
10873 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
10874 %2 = icmp sge <4 x i32> %0, %1
10875 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10876 %4 = bitcast <8 x i1> %3 to i8
10880 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
10881 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
10882 ; VLX: # %bb.0: # %entry
10883 ; VLX-NEXT: kmovd %edi, %k1
10884 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
10885 ; VLX-NEXT: kmovd %k0, %eax
10886 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10889 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
10890 ; NoVLX: # %bb.0: # %entry
10891 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10892 ; NoVLX-NEXT: kmovw %edi, %k1
10893 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
10894 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10895 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10896 ; NoVLX-NEXT: kmovw %k0, %eax
10897 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10898 ; NoVLX-NEXT: vzeroupper
10901 %0 = bitcast <2 x i64> %__a to <4 x i32>
10902 %load = load i32, i32* %__b
10903 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
10904 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
10905 %2 = icmp sge <4 x i32> %0, %1
10906 %3 = bitcast i8 %__u to <8 x i1>
10907 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10908 %4 = and <4 x i1> %extract.i, %2
10909 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10910 %6 = bitcast <8 x i1> %5 to i8
10915 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10916 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
10917 ; VLX: # %bb.0: # %entry
10918 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
10919 ; VLX-NEXT: kmovd %k0, %eax
10920 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10923 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
10924 ; NoVLX: # %bb.0: # %entry
10925 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10926 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10927 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10928 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10929 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10930 ; NoVLX-NEXT: kmovw %k0, %eax
10931 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10932 ; NoVLX-NEXT: vzeroupper
10935 %0 = bitcast <2 x i64> %__a to <4 x i32>
10936 %1 = bitcast <2 x i64> %__b to <4 x i32>
10937 %2 = icmp sge <4 x i32> %0, %1
10938 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
10939 %4 = bitcast <16 x i1> %3 to i16
10943 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10944 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
10945 ; VLX: # %bb.0: # %entry
10946 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
10947 ; VLX-NEXT: kmovd %k0, %eax
10948 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10951 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
10952 ; NoVLX: # %bb.0: # %entry
10953 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10954 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10955 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10956 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10957 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10958 ; NoVLX-NEXT: kmovw %k0, %eax
10959 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10960 ; NoVLX-NEXT: vzeroupper
10963 %0 = bitcast <2 x i64> %__a to <4 x i32>
10964 %load = load <2 x i64>, <2 x i64>* %__b
10965 %1 = bitcast <2 x i64> %load to <4 x i32>
10966 %2 = icmp sge <4 x i32> %0, %1
10967 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
10968 %4 = bitcast <16 x i1> %3 to i16
10972 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10973 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
10974 ; VLX: # %bb.0: # %entry
10975 ; VLX-NEXT: kmovd %edi, %k1
10976 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
10977 ; VLX-NEXT: kmovd %k0, %eax
10978 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10981 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
10982 ; NoVLX: # %bb.0: # %entry
10983 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10984 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10985 ; NoVLX-NEXT: kmovw %edi, %k1
10986 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10987 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10988 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10989 ; NoVLX-NEXT: kmovw %k0, %eax
10990 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10991 ; NoVLX-NEXT: vzeroupper
10994 %0 = bitcast <2 x i64> %__a to <4 x i32>
10995 %1 = bitcast <2 x i64> %__b to <4 x i32>
10996 %2 = icmp sge <4 x i32> %0, %1
10997 %3 = bitcast i8 %__u to <8 x i1>
10998 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10999 %4 = and <4 x i1> %2, %extract.i
11000 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11001 %6 = bitcast <16 x i1> %5 to i16
11005 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11006 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11007 ; VLX: # %bb.0: # %entry
11008 ; VLX-NEXT: kmovd %edi, %k1
11009 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11010 ; VLX-NEXT: kmovd %k0, %eax
11011 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11014 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11015 ; NoVLX: # %bb.0: # %entry
11016 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11017 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11018 ; NoVLX-NEXT: kmovw %edi, %k1
11019 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11020 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11021 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11022 ; NoVLX-NEXT: kmovw %k0, %eax
11023 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11024 ; NoVLX-NEXT: vzeroupper
11027 %0 = bitcast <2 x i64> %__a to <4 x i32>
11028 %load = load <2 x i64>, <2 x i64>* %__b
11029 %1 = bitcast <2 x i64> %load to <4 x i32>
11030 %2 = icmp sge <4 x i32> %0, %1
11031 %3 = bitcast i8 %__u to <8 x i1>
11032 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11033 %4 = and <4 x i1> %2, %extract.i
11034 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11035 %6 = bitcast <16 x i1> %5 to i16
11040 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11041 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11042 ; VLX: # %bb.0: # %entry
11043 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11044 ; VLX-NEXT: kmovd %k0, %eax
11045 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11048 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11049 ; NoVLX: # %bb.0: # %entry
11050 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11051 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11052 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11053 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11054 ; NoVLX-NEXT: kmovw %k0, %eax
11055 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11056 ; NoVLX-NEXT: vzeroupper
11059 %0 = bitcast <2 x i64> %__a to <4 x i32>
11060 %load = load i32, i32* %__b
11061 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11062 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11063 %2 = icmp sge <4 x i32> %0, %1
11064 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11065 %4 = bitcast <16 x i1> %3 to i16
11069 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11070 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11071 ; VLX: # %bb.0: # %entry
11072 ; VLX-NEXT: kmovd %edi, %k1
11073 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11074 ; VLX-NEXT: kmovd %k0, %eax
11075 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11078 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11079 ; NoVLX: # %bb.0: # %entry
11080 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11081 ; NoVLX-NEXT: kmovw %edi, %k1
11082 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11083 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11084 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11085 ; NoVLX-NEXT: kmovw %k0, %eax
11086 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11087 ; NoVLX-NEXT: vzeroupper
11090 %0 = bitcast <2 x i64> %__a to <4 x i32>
11091 %load = load i32, i32* %__b
11092 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11093 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11094 %2 = icmp sge <4 x i32> %0, %1
11095 %3 = bitcast i8 %__u to <8 x i1>
11096 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11097 %4 = and <4 x i1> %extract.i, %2
11098 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11099 %6 = bitcast <16 x i1> %5 to i16
11104 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11105 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11106 ; VLX: # %bb.0: # %entry
11107 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
11108 ; VLX-NEXT: kmovd %k0, %eax
11111 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11112 ; NoVLX: # %bb.0: # %entry
11113 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11114 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11115 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11116 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11117 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11118 ; NoVLX-NEXT: kmovw %k0, %eax
11119 ; NoVLX-NEXT: vzeroupper
11122 %0 = bitcast <2 x i64> %__a to <4 x i32>
11123 %1 = bitcast <2 x i64> %__b to <4 x i32>
11124 %2 = icmp sge <4 x i32> %0, %1
11125 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11126 %4 = bitcast <32 x i1> %3 to i32
11130 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11131 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11132 ; VLX: # %bb.0: # %entry
11133 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
11134 ; VLX-NEXT: kmovd %k0, %eax
11137 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11138 ; NoVLX: # %bb.0: # %entry
11139 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11140 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
11141 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11142 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11143 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11144 ; NoVLX-NEXT: kmovw %k0, %eax
11145 ; NoVLX-NEXT: vzeroupper
11148 %0 = bitcast <2 x i64> %__a to <4 x i32>
11149 %load = load <2 x i64>, <2 x i64>* %__b
11150 %1 = bitcast <2 x i64> %load to <4 x i32>
11151 %2 = icmp sge <4 x i32> %0, %1
11152 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11153 %4 = bitcast <32 x i1> %3 to i32
11157 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11158 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11159 ; VLX: # %bb.0: # %entry
11160 ; VLX-NEXT: kmovd %edi, %k1
11161 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11162 ; VLX-NEXT: kmovd %k0, %eax
11165 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11166 ; NoVLX: # %bb.0: # %entry
11167 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11168 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11169 ; NoVLX-NEXT: kmovw %edi, %k1
11170 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11171 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11172 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11173 ; NoVLX-NEXT: kmovw %k0, %eax
11174 ; NoVLX-NEXT: vzeroupper
11177 %0 = bitcast <2 x i64> %__a to <4 x i32>
11178 %1 = bitcast <2 x i64> %__b to <4 x i32>
11179 %2 = icmp sge <4 x i32> %0, %1
11180 %3 = bitcast i8 %__u to <8 x i1>
11181 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11182 %4 = and <4 x i1> %2, %extract.i
11183 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11184 %6 = bitcast <32 x i1> %5 to i32
11188 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11189 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11190 ; VLX: # %bb.0: # %entry
11191 ; VLX-NEXT: kmovd %edi, %k1
11192 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11193 ; VLX-NEXT: kmovd %k0, %eax
11196 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11197 ; NoVLX: # %bb.0: # %entry
11198 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11199 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11200 ; NoVLX-NEXT: kmovw %edi, %k1
11201 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11202 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11203 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11204 ; NoVLX-NEXT: kmovw %k0, %eax
11205 ; NoVLX-NEXT: vzeroupper
11208 %0 = bitcast <2 x i64> %__a to <4 x i32>
11209 %load = load <2 x i64>, <2 x i64>* %__b
11210 %1 = bitcast <2 x i64> %load to <4 x i32>
11211 %2 = icmp sge <4 x i32> %0, %1
11212 %3 = bitcast i8 %__u to <8 x i1>
11213 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11214 %4 = and <4 x i1> %2, %extract.i
11215 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11216 %6 = bitcast <32 x i1> %5 to i32
11221 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11222 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11223 ; VLX: # %bb.0: # %entry
11224 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11225 ; VLX-NEXT: kmovd %k0, %eax
11228 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11229 ; NoVLX: # %bb.0: # %entry
11230 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11231 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11232 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11233 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11234 ; NoVLX-NEXT: kmovw %k0, %eax
11235 ; NoVLX-NEXT: vzeroupper
11238 %0 = bitcast <2 x i64> %__a to <4 x i32>
11239 %load = load i32, i32* %__b
11240 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11241 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11242 %2 = icmp sge <4 x i32> %0, %1
11243 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11244 %4 = bitcast <32 x i1> %3 to i32
11248 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11249 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11250 ; VLX: # %bb.0: # %entry
11251 ; VLX-NEXT: kmovd %edi, %k1
11252 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11253 ; VLX-NEXT: kmovd %k0, %eax
11256 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11257 ; NoVLX: # %bb.0: # %entry
11258 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11259 ; NoVLX-NEXT: kmovw %edi, %k1
11260 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11261 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11262 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11263 ; NoVLX-NEXT: kmovw %k0, %eax
11264 ; NoVLX-NEXT: vzeroupper
11267 %0 = bitcast <2 x i64> %__a to <4 x i32>
11268 %load = load i32, i32* %__b
11269 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11270 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11271 %2 = icmp sge <4 x i32> %0, %1
11272 %3 = bitcast i8 %__u to <8 x i1>
11273 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11274 %4 = and <4 x i1> %extract.i, %2
11275 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11276 %6 = bitcast <32 x i1> %5 to i32
11281 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11282 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11283 ; VLX: # %bb.0: # %entry
11284 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
11285 ; VLX-NEXT: kmovq %k0, %rax
11288 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11289 ; NoVLX: # %bb.0: # %entry
11290 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11291 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11292 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11293 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11294 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11295 ; NoVLX-NEXT: kmovw %k0, %eax
11296 ; NoVLX-NEXT: vzeroupper
11299 %0 = bitcast <2 x i64> %__a to <4 x i32>
11300 %1 = bitcast <2 x i64> %__b to <4 x i32>
11301 %2 = icmp sge <4 x i32> %0, %1
11302 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11303 %4 = bitcast <64 x i1> %3 to i64
11307 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11308 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11309 ; VLX: # %bb.0: # %entry
11310 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
11311 ; VLX-NEXT: kmovq %k0, %rax
11314 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11315 ; NoVLX: # %bb.0: # %entry
11316 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11317 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
11318 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11319 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11320 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11321 ; NoVLX-NEXT: kmovw %k0, %eax
11322 ; NoVLX-NEXT: vzeroupper
11325 %0 = bitcast <2 x i64> %__a to <4 x i32>
11326 %load = load <2 x i64>, <2 x i64>* %__b
11327 %1 = bitcast <2 x i64> %load to <4 x i32>
11328 %2 = icmp sge <4 x i32> %0, %1
11329 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11330 %4 = bitcast <64 x i1> %3 to i64
11334 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11335 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11336 ; VLX: # %bb.0: # %entry
11337 ; VLX-NEXT: kmovd %edi, %k1
11338 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11339 ; VLX-NEXT: kmovq %k0, %rax
11342 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11343 ; NoVLX: # %bb.0: # %entry
11344 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11345 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11346 ; NoVLX-NEXT: kmovw %edi, %k1
11347 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11348 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11349 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11350 ; NoVLX-NEXT: kmovw %k0, %eax
11351 ; NoVLX-NEXT: vzeroupper
11354 %0 = bitcast <2 x i64> %__a to <4 x i32>
11355 %1 = bitcast <2 x i64> %__b to <4 x i32>
11356 %2 = icmp sge <4 x i32> %0, %1
11357 %3 = bitcast i8 %__u to <8 x i1>
11358 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11359 %4 = and <4 x i1> %2, %extract.i
11360 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11361 %6 = bitcast <64 x i1> %5 to i64
11365 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11366 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11367 ; VLX: # %bb.0: # %entry
11368 ; VLX-NEXT: kmovd %edi, %k1
11369 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11370 ; VLX-NEXT: kmovq %k0, %rax
11373 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11374 ; NoVLX: # %bb.0: # %entry
11375 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11376 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11377 ; NoVLX-NEXT: kmovw %edi, %k1
11378 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11379 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11380 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11381 ; NoVLX-NEXT: kmovw %k0, %eax
11382 ; NoVLX-NEXT: vzeroupper
11385 %0 = bitcast <2 x i64> %__a to <4 x i32>
11386 %load = load <2 x i64>, <2 x i64>* %__b
11387 %1 = bitcast <2 x i64> %load to <4 x i32>
11388 %2 = icmp sge <4 x i32> %0, %1
11389 %3 = bitcast i8 %__u to <8 x i1>
11390 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11391 %4 = and <4 x i1> %2, %extract.i
11392 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11393 %6 = bitcast <64 x i1> %5 to i64
11398 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11399 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11400 ; VLX: # %bb.0: # %entry
11401 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11402 ; VLX-NEXT: kmovq %k0, %rax
11405 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11406 ; NoVLX: # %bb.0: # %entry
11407 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11408 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11409 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11410 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11411 ; NoVLX-NEXT: kmovw %k0, %eax
11412 ; NoVLX-NEXT: vzeroupper
11415 %0 = bitcast <2 x i64> %__a to <4 x i32>
11416 %load = load i32, i32* %__b
11417 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11418 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11419 %2 = icmp sge <4 x i32> %0, %1
11420 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11421 %4 = bitcast <64 x i1> %3 to i64
11425 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11426 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11427 ; VLX: # %bb.0: # %entry
11428 ; VLX-NEXT: kmovd %edi, %k1
11429 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11430 ; VLX-NEXT: kmovq %k0, %rax
11433 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11434 ; NoVLX: # %bb.0: # %entry
11435 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11436 ; NoVLX-NEXT: kmovw %edi, %k1
11437 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11438 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11439 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11440 ; NoVLX-NEXT: kmovw %k0, %eax
11441 ; NoVLX-NEXT: vzeroupper
11444 %0 = bitcast <2 x i64> %__a to <4 x i32>
11445 %load = load i32, i32* %__b
11446 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11447 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11448 %2 = icmp sge <4 x i32> %0, %1
11449 %3 = bitcast i8 %__u to <8 x i1>
11450 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11451 %4 = and <4 x i1> %extract.i, %2
11452 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11453 %6 = bitcast <64 x i1> %5 to i64
11458 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11459 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11460 ; VLX: # %bb.0: # %entry
11461 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11462 ; VLX-NEXT: kmovd %k0, %eax
11463 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11464 ; VLX-NEXT: vzeroupper
11467 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11468 ; NoVLX: # %bb.0: # %entry
11469 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11470 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11471 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11472 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11473 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11474 ; NoVLX-NEXT: kmovw %k0, %eax
11475 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11476 ; NoVLX-NEXT: vzeroupper
11479 %0 = bitcast <4 x i64> %__a to <8 x i32>
11480 %1 = bitcast <4 x i64> %__b to <8 x i32>
11481 %2 = icmp sge <8 x i32> %0, %1
11482 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11483 %4 = bitcast <16 x i1> %3 to i16
11487 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11488 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11489 ; VLX: # %bb.0: # %entry
11490 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11491 ; VLX-NEXT: kmovd %k0, %eax
11492 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11493 ; VLX-NEXT: vzeroupper
11496 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11497 ; NoVLX: # %bb.0: # %entry
11498 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11499 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11500 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11501 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11502 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11503 ; NoVLX-NEXT: kmovw %k0, %eax
11504 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11505 ; NoVLX-NEXT: vzeroupper
11508 %0 = bitcast <4 x i64> %__a to <8 x i32>
11509 %load = load <4 x i64>, <4 x i64>* %__b
11510 %1 = bitcast <4 x i64> %load to <8 x i32>
11511 %2 = icmp sge <8 x i32> %0, %1
11512 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11513 %4 = bitcast <16 x i1> %3 to i16
11517 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11518 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11519 ; VLX: # %bb.0: # %entry
11520 ; VLX-NEXT: kmovd %edi, %k1
11521 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11522 ; VLX-NEXT: kmovd %k0, %eax
11523 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11524 ; VLX-NEXT: vzeroupper
11527 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11528 ; NoVLX: # %bb.0: # %entry
11529 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11530 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11531 ; NoVLX-NEXT: kmovw %edi, %k1
11532 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11533 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11534 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11535 ; NoVLX-NEXT: kmovw %k0, %eax
11536 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11537 ; NoVLX-NEXT: vzeroupper
11540 %0 = bitcast <4 x i64> %__a to <8 x i32>
11541 %1 = bitcast <4 x i64> %__b to <8 x i32>
11542 %2 = icmp sge <8 x i32> %0, %1
11543 %3 = bitcast i8 %__u to <8 x i1>
11544 %4 = and <8 x i1> %2, %3
11545 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11546 %6 = bitcast <16 x i1> %5 to i16
11550 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11551 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11552 ; VLX: # %bb.0: # %entry
11553 ; VLX-NEXT: kmovd %edi, %k1
11554 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11555 ; VLX-NEXT: kmovd %k0, %eax
11556 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11557 ; VLX-NEXT: vzeroupper
11560 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11561 ; NoVLX: # %bb.0: # %entry
11562 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11563 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
11564 ; NoVLX-NEXT: kmovw %edi, %k1
11565 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11566 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11567 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11568 ; NoVLX-NEXT: kmovw %k0, %eax
11569 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11570 ; NoVLX-NEXT: vzeroupper
11573 %0 = bitcast <4 x i64> %__a to <8 x i32>
11574 %load = load <4 x i64>, <4 x i64>* %__b
11575 %1 = bitcast <4 x i64> %load to <8 x i32>
11576 %2 = icmp sge <8 x i32> %0, %1
11577 %3 = bitcast i8 %__u to <8 x i1>
11578 %4 = and <8 x i1> %2, %3
11579 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11580 %6 = bitcast <16 x i1> %5 to i16
11585 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
11586 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11587 ; VLX: # %bb.0: # %entry
11588 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11589 ; VLX-NEXT: kmovd %k0, %eax
11590 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11591 ; VLX-NEXT: vzeroupper
11594 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11595 ; NoVLX: # %bb.0: # %entry
11596 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11597 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11598 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11599 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11600 ; NoVLX-NEXT: kmovw %k0, %eax
11601 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11602 ; NoVLX-NEXT: vzeroupper
11605 %0 = bitcast <4 x i64> %__a to <8 x i32>
11606 %load = load i32, i32* %__b
11607 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11608 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11609 %2 = icmp sge <8 x i32> %0, %1
11610 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11611 %4 = bitcast <16 x i1> %3 to i16
11615 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
11616 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11617 ; VLX: # %bb.0: # %entry
11618 ; VLX-NEXT: kmovd %edi, %k1
11619 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11620 ; VLX-NEXT: kmovd %k0, %eax
11621 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11622 ; VLX-NEXT: vzeroupper
11625 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11626 ; NoVLX: # %bb.0: # %entry
11627 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11628 ; NoVLX-NEXT: kmovw %edi, %k1
11629 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11630 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11631 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11632 ; NoVLX-NEXT: kmovw %k0, %eax
11633 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11634 ; NoVLX-NEXT: vzeroupper
11637 %0 = bitcast <4 x i64> %__a to <8 x i32>
11638 %load = load i32, i32* %__b
11639 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11640 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11641 %2 = icmp sge <8 x i32> %0, %1
11642 %3 = bitcast i8 %__u to <8 x i1>
11643 %4 = and <8 x i1> %3, %2
11644 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11645 %6 = bitcast <16 x i1> %5 to i16
11650 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11651 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11652 ; VLX: # %bb.0: # %entry
11653 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11654 ; VLX-NEXT: kmovd %k0, %eax
11655 ; VLX-NEXT: vzeroupper
11658 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11659 ; NoVLX: # %bb.0: # %entry
11660 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11661 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11662 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11663 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11664 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11665 ; NoVLX-NEXT: kmovw %k0, %eax
11666 ; NoVLX-NEXT: vzeroupper
11669 %0 = bitcast <4 x i64> %__a to <8 x i32>
11670 %1 = bitcast <4 x i64> %__b to <8 x i32>
11671 %2 = icmp sge <8 x i32> %0, %1
11672 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11673 %4 = bitcast <32 x i1> %3 to i32
11677 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11678 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11679 ; VLX: # %bb.0: # %entry
11680 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11681 ; VLX-NEXT: kmovd %k0, %eax
11682 ; VLX-NEXT: vzeroupper
11685 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11686 ; NoVLX: # %bb.0: # %entry
11687 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11688 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11689 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11690 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11691 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11692 ; NoVLX-NEXT: kmovw %k0, %eax
11693 ; NoVLX-NEXT: vzeroupper
11696 %0 = bitcast <4 x i64> %__a to <8 x i32>
11697 %load = load <4 x i64>, <4 x i64>* %__b
11698 %1 = bitcast <4 x i64> %load to <8 x i32>
11699 %2 = icmp sge <8 x i32> %0, %1
11700 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11701 %4 = bitcast <32 x i1> %3 to i32
11705 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11706 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11707 ; VLX: # %bb.0: # %entry
11708 ; VLX-NEXT: kmovd %edi, %k1
11709 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11710 ; VLX-NEXT: kmovd %k0, %eax
11711 ; VLX-NEXT: vzeroupper
11714 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11715 ; NoVLX: # %bb.0: # %entry
11716 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11717 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11718 ; NoVLX-NEXT: kmovw %edi, %k1
11719 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11720 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11721 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11722 ; NoVLX-NEXT: kmovw %k0, %eax
11723 ; NoVLX-NEXT: vzeroupper
11726 %0 = bitcast <4 x i64> %__a to <8 x i32>
11727 %1 = bitcast <4 x i64> %__b to <8 x i32>
11728 %2 = icmp sge <8 x i32> %0, %1
11729 %3 = bitcast i8 %__u to <8 x i1>
11730 %4 = and <8 x i1> %2, %3
11731 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11732 %6 = bitcast <32 x i1> %5 to i32
11736 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11737 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11738 ; VLX: # %bb.0: # %entry
11739 ; VLX-NEXT: kmovd %edi, %k1
11740 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11741 ; VLX-NEXT: kmovd %k0, %eax
11742 ; VLX-NEXT: vzeroupper
11745 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11746 ; NoVLX: # %bb.0: # %entry
11747 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11748 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
11749 ; NoVLX-NEXT: kmovw %edi, %k1
11750 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11751 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11752 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11753 ; NoVLX-NEXT: kmovw %k0, %eax
11754 ; NoVLX-NEXT: vzeroupper
11757 %0 = bitcast <4 x i64> %__a to <8 x i32>
11758 %load = load <4 x i64>, <4 x i64>* %__b
11759 %1 = bitcast <4 x i64> %load to <8 x i32>
11760 %2 = icmp sge <8 x i32> %0, %1
11761 %3 = bitcast i8 %__u to <8 x i1>
11762 %4 = and <8 x i1> %2, %3
11763 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11764 %6 = bitcast <32 x i1> %5 to i32
11769 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
11770 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11771 ; VLX: # %bb.0: # %entry
11772 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11773 ; VLX-NEXT: kmovd %k0, %eax
11774 ; VLX-NEXT: vzeroupper
11777 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11778 ; NoVLX: # %bb.0: # %entry
11779 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11780 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11781 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11782 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11783 ; NoVLX-NEXT: kmovw %k0, %eax
11784 ; NoVLX-NEXT: vzeroupper
11787 %0 = bitcast <4 x i64> %__a to <8 x i32>
11788 %load = load i32, i32* %__b
11789 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11790 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11791 %2 = icmp sge <8 x i32> %0, %1
11792 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11793 %4 = bitcast <32 x i1> %3 to i32
11797 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
11798 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
11799 ; VLX: # %bb.0: # %entry
11800 ; VLX-NEXT: kmovd %edi, %k1
11801 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11802 ; VLX-NEXT: kmovd %k0, %eax
11803 ; VLX-NEXT: vzeroupper
11806 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
11807 ; NoVLX: # %bb.0: # %entry
11808 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11809 ; NoVLX-NEXT: kmovw %edi, %k1
11810 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11811 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11812 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11813 ; NoVLX-NEXT: kmovw %k0, %eax
11814 ; NoVLX-NEXT: vzeroupper
11817 %0 = bitcast <4 x i64> %__a to <8 x i32>
11818 %load = load i32, i32* %__b
11819 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11820 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11821 %2 = icmp sge <8 x i32> %0, %1
11822 %3 = bitcast i8 %__u to <8 x i1>
11823 %4 = and <8 x i1> %3, %2
11824 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11825 %6 = bitcast <32 x i1> %5 to i32
11830 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11831 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
11832 ; VLX: # %bb.0: # %entry
11833 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11834 ; VLX-NEXT: kmovq %k0, %rax
11835 ; VLX-NEXT: vzeroupper
11838 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
11839 ; NoVLX: # %bb.0: # %entry
11840 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11841 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11842 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11843 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11844 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11845 ; NoVLX-NEXT: kmovw %k0, %eax
11846 ; NoVLX-NEXT: vzeroupper
11849 %0 = bitcast <4 x i64> %__a to <8 x i32>
11850 %1 = bitcast <4 x i64> %__b to <8 x i32>
11851 %2 = icmp sge <8 x i32> %0, %1
11852 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11853 %4 = bitcast <64 x i1> %3 to i64
11857 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11858 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
11859 ; VLX: # %bb.0: # %entry
11860 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11861 ; VLX-NEXT: kmovq %k0, %rax
11862 ; VLX-NEXT: vzeroupper
11865 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
11866 ; NoVLX: # %bb.0: # %entry
11867 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11868 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11869 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11870 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11871 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11872 ; NoVLX-NEXT: kmovw %k0, %eax
11873 ; NoVLX-NEXT: vzeroupper
11876 %0 = bitcast <4 x i64> %__a to <8 x i32>
11877 %load = load <4 x i64>, <4 x i64>* %__b
11878 %1 = bitcast <4 x i64> %load to <8 x i32>
11879 %2 = icmp sge <8 x i32> %0, %1
11880 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11881 %4 = bitcast <64 x i1> %3 to i64
11885 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11886 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
11887 ; VLX: # %bb.0: # %entry
11888 ; VLX-NEXT: kmovd %edi, %k1
11889 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11890 ; VLX-NEXT: kmovq %k0, %rax
11891 ; VLX-NEXT: vzeroupper
11894 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
11895 ; NoVLX: # %bb.0: # %entry
11896 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11897 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11898 ; NoVLX-NEXT: kmovw %edi, %k1
11899 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11900 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11901 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11902 ; NoVLX-NEXT: kmovw %k0, %eax
11903 ; NoVLX-NEXT: vzeroupper
11906 %0 = bitcast <4 x i64> %__a to <8 x i32>
11907 %1 = bitcast <4 x i64> %__b to <8 x i32>
11908 %2 = icmp sge <8 x i32> %0, %1
11909 %3 = bitcast i8 %__u to <8 x i1>
11910 %4 = and <8 x i1> %2, %3
11911 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11912 %6 = bitcast <64 x i1> %5 to i64
11916 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11917 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
11918 ; VLX: # %bb.0: # %entry
11919 ; VLX-NEXT: kmovd %edi, %k1
11920 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11921 ; VLX-NEXT: kmovq %k0, %rax
11922 ; VLX-NEXT: vzeroupper
11925 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
11926 ; NoVLX: # %bb.0: # %entry
11927 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11928 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
11929 ; NoVLX-NEXT: kmovw %edi, %k1
11930 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11931 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11932 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11933 ; NoVLX-NEXT: kmovw %k0, %eax
11934 ; NoVLX-NEXT: vzeroupper
11937 %0 = bitcast <4 x i64> %__a to <8 x i32>
11938 %load = load <4 x i64>, <4 x i64>* %__b
11939 %1 = bitcast <4 x i64> %load to <8 x i32>
11940 %2 = icmp sge <8 x i32> %0, %1
11941 %3 = bitcast i8 %__u to <8 x i1>
11942 %4 = and <8 x i1> %2, %3
11943 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11944 %6 = bitcast <64 x i1> %5 to i64
11949 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
11950 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
11951 ; VLX: # %bb.0: # %entry
11952 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11953 ; VLX-NEXT: kmovq %k0, %rax
11954 ; VLX-NEXT: vzeroupper
11957 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
11958 ; NoVLX: # %bb.0: # %entry
11959 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11960 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11961 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11962 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11963 ; NoVLX-NEXT: kmovw %k0, %eax
11964 ; NoVLX-NEXT: vzeroupper
11967 %0 = bitcast <4 x i64> %__a to <8 x i32>
11968 %load = load i32, i32* %__b
11969 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11970 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11971 %2 = icmp sge <8 x i32> %0, %1
11972 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11973 %4 = bitcast <64 x i1> %3 to i64
11977 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
11978 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
11979 ; VLX: # %bb.0: # %entry
11980 ; VLX-NEXT: kmovd %edi, %k1
11981 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11982 ; VLX-NEXT: kmovq %k0, %rax
11983 ; VLX-NEXT: vzeroupper
11986 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
11987 ; NoVLX: # %bb.0: # %entry
11988 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11989 ; NoVLX-NEXT: kmovw %edi, %k1
11990 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11991 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11992 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11993 ; NoVLX-NEXT: kmovw %k0, %eax
11994 ; NoVLX-NEXT: vzeroupper
11997 %0 = bitcast <4 x i64> %__a to <8 x i32>
11998 %load = load i32, i32* %__b
11999 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12000 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12001 %2 = icmp sge <8 x i32> %0, %1
12002 %3 = bitcast i8 %__u to <8 x i1>
12003 %4 = and <8 x i1> %3, %2
12004 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12005 %6 = bitcast <64 x i1> %5 to i64
12010 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12011 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12012 ; VLX: # %bb.0: # %entry
12013 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12014 ; VLX-NEXT: kmovd %k0, %eax
12015 ; VLX-NEXT: vzeroupper
12018 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12019 ; NoVLX: # %bb.0: # %entry
12020 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12021 ; NoVLX-NEXT: kmovw %k0, %eax
12022 ; NoVLX-NEXT: vzeroupper
12025 %0 = bitcast <8 x i64> %__a to <16 x i32>
12026 %1 = bitcast <8 x i64> %__b to <16 x i32>
12027 %2 = icmp sge <16 x i32> %0, %1
12028 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12029 %4 = bitcast <32 x i1> %3 to i32
12033 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12034 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12035 ; VLX: # %bb.0: # %entry
12036 ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12037 ; VLX-NEXT: kmovd %k0, %eax
12038 ; VLX-NEXT: vzeroupper
12041 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12042 ; NoVLX: # %bb.0: # %entry
12043 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12044 ; NoVLX-NEXT: kmovw %k0, %eax
12045 ; NoVLX-NEXT: vzeroupper
12048 %0 = bitcast <8 x i64> %__a to <16 x i32>
12049 %load = load <8 x i64>, <8 x i64>* %__b
12050 %1 = bitcast <8 x i64> %load to <16 x i32>
12051 %2 = icmp sge <16 x i32> %0, %1
12052 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12053 %4 = bitcast <32 x i1> %3 to i32
12057 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12058 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12059 ; VLX: # %bb.0: # %entry
12060 ; VLX-NEXT: kmovd %edi, %k1
12061 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12062 ; VLX-NEXT: kmovd %k0, %eax
12063 ; VLX-NEXT: vzeroupper
12066 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12067 ; NoVLX: # %bb.0: # %entry
12068 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12069 ; NoVLX-NEXT: kmovw %k0, %eax
12070 ; NoVLX-NEXT: andl %edi, %eax
12071 ; NoVLX-NEXT: vzeroupper
12074 %0 = bitcast <8 x i64> %__a to <16 x i32>
12075 %1 = bitcast <8 x i64> %__b to <16 x i32>
12076 %2 = icmp sge <16 x i32> %0, %1
12077 %3 = bitcast i16 %__u to <16 x i1>
12078 %4 = and <16 x i1> %2, %3
12079 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12080 %6 = bitcast <32 x i1> %5 to i32
12084 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12085 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12086 ; VLX: # %bb.0: # %entry
12087 ; VLX-NEXT: kmovd %edi, %k1
12088 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12089 ; VLX-NEXT: kmovd %k0, %eax
12090 ; VLX-NEXT: vzeroupper
12093 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12094 ; NoVLX: # %bb.0: # %entry
12095 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0
12096 ; NoVLX-NEXT: kmovw %k0, %eax
12097 ; NoVLX-NEXT: andl %edi, %eax
12098 ; NoVLX-NEXT: vzeroupper
12101 %0 = bitcast <8 x i64> %__a to <16 x i32>
12102 %load = load <8 x i64>, <8 x i64>* %__b
12103 %1 = bitcast <8 x i64> %load to <16 x i32>
12104 %2 = icmp sge <16 x i32> %0, %1
12105 %3 = bitcast i16 %__u to <16 x i1>
12106 %4 = and <16 x i1> %2, %3
12107 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12108 %6 = bitcast <32 x i1> %5 to i32
12113 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
12114 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12115 ; VLX: # %bb.0: # %entry
12116 ; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12117 ; VLX-NEXT: kmovd %k0, %eax
12118 ; VLX-NEXT: vzeroupper
12121 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12122 ; NoVLX: # %bb.0: # %entry
12123 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12124 ; NoVLX-NEXT: kmovw %k0, %eax
12125 ; NoVLX-NEXT: vzeroupper
12128 %0 = bitcast <8 x i64> %__a to <16 x i32>
12129 %load = load i32, i32* %__b
12130 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12131 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12132 %2 = icmp sge <16 x i32> %0, %1
12133 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12134 %4 = bitcast <32 x i1> %3 to i32
12138 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
12139 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12140 ; VLX: # %bb.0: # %entry
12141 ; VLX-NEXT: kmovd %edi, %k1
12142 ; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12143 ; VLX-NEXT: kmovd %k0, %eax
12144 ; VLX-NEXT: vzeroupper
12147 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12148 ; NoVLX: # %bb.0: # %entry
12149 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12150 ; NoVLX-NEXT: kmovw %k0, %eax
12151 ; NoVLX-NEXT: andl %edi, %eax
12152 ; NoVLX-NEXT: vzeroupper
12155 %0 = bitcast <8 x i64> %__a to <16 x i32>
12156 %load = load i32, i32* %__b
12157 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12158 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12159 %2 = icmp sge <16 x i32> %0, %1
12160 %3 = bitcast i16 %__u to <16 x i1>
12161 %4 = and <16 x i1> %3, %2
12162 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12163 %6 = bitcast <32 x i1> %5 to i32
12168 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12169 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12170 ; VLX: # %bb.0: # %entry
12171 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12172 ; VLX-NEXT: kmovq %k0, %rax
12173 ; VLX-NEXT: vzeroupper
12176 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12177 ; NoVLX: # %bb.0: # %entry
12178 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12179 ; NoVLX-NEXT: kmovw %k0, %eax
12180 ; NoVLX-NEXT: vzeroupper
12183 %0 = bitcast <8 x i64> %__a to <16 x i32>
12184 %1 = bitcast <8 x i64> %__b to <16 x i32>
12185 %2 = icmp sge <16 x i32> %0, %1
12186 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12187 %4 = bitcast <64 x i1> %3 to i64
12191 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12192 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12193 ; VLX: # %bb.0: # %entry
12194 ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12195 ; VLX-NEXT: kmovq %k0, %rax
12196 ; VLX-NEXT: vzeroupper
12199 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12200 ; NoVLX: # %bb.0: # %entry
12201 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12202 ; NoVLX-NEXT: kmovw %k0, %eax
12203 ; NoVLX-NEXT: vzeroupper
12206 %0 = bitcast <8 x i64> %__a to <16 x i32>
12207 %load = load <8 x i64>, <8 x i64>* %__b
12208 %1 = bitcast <8 x i64> %load to <16 x i32>
12209 %2 = icmp sge <16 x i32> %0, %1
12210 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12211 %4 = bitcast <64 x i1> %3 to i64
12215 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12216 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12217 ; VLX: # %bb.0: # %entry
12218 ; VLX-NEXT: kmovd %edi, %k1
12219 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12220 ; VLX-NEXT: kmovq %k0, %rax
12221 ; VLX-NEXT: vzeroupper
12224 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12225 ; NoVLX: # %bb.0: # %entry
12226 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12227 ; NoVLX-NEXT: kmovw %k0, %eax
12228 ; NoVLX-NEXT: andl %edi, %eax
12229 ; NoVLX-NEXT: vzeroupper
12232 %0 = bitcast <8 x i64> %__a to <16 x i32>
12233 %1 = bitcast <8 x i64> %__b to <16 x i32>
12234 %2 = icmp sge <16 x i32> %0, %1
12235 %3 = bitcast i16 %__u to <16 x i1>
12236 %4 = and <16 x i1> %2, %3
12237 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12238 %6 = bitcast <64 x i1> %5 to i64
12242 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12243 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12244 ; VLX: # %bb.0: # %entry
12245 ; VLX-NEXT: kmovd %edi, %k1
12246 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12247 ; VLX-NEXT: kmovq %k0, %rax
12248 ; VLX-NEXT: vzeroupper
12251 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12252 ; NoVLX: # %bb.0: # %entry
12253 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0
12254 ; NoVLX-NEXT: kmovw %k0, %eax
12255 ; NoVLX-NEXT: andl %edi, %eax
12256 ; NoVLX-NEXT: vzeroupper
12259 %0 = bitcast <8 x i64> %__a to <16 x i32>
12260 %load = load <8 x i64>, <8 x i64>* %__b
12261 %1 = bitcast <8 x i64> %load to <16 x i32>
12262 %2 = icmp sge <16 x i32> %0, %1
12263 %3 = bitcast i16 %__u to <16 x i1>
12264 %4 = and <16 x i1> %2, %3
12265 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12266 %6 = bitcast <64 x i1> %5 to i64
12271 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
12272 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12273 ; VLX: # %bb.0: # %entry
12274 ; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12275 ; VLX-NEXT: kmovq %k0, %rax
12276 ; VLX-NEXT: vzeroupper
12279 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12280 ; NoVLX: # %bb.0: # %entry
12281 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12282 ; NoVLX-NEXT: kmovw %k0, %eax
12283 ; NoVLX-NEXT: vzeroupper
12286 %0 = bitcast <8 x i64> %__a to <16 x i32>
12287 %load = load i32, i32* %__b
12288 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12289 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12290 %2 = icmp sge <16 x i32> %0, %1
12291 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12292 %4 = bitcast <64 x i1> %3 to i64
12296 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
12297 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12298 ; VLX: # %bb.0: # %entry
12299 ; VLX-NEXT: kmovd %edi, %k1
12300 ; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12301 ; VLX-NEXT: kmovq %k0, %rax
12302 ; VLX-NEXT: vzeroupper
12305 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12306 ; NoVLX: # %bb.0: # %entry
12307 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12308 ; NoVLX-NEXT: kmovw %k0, %eax
12309 ; NoVLX-NEXT: andl %edi, %eax
12310 ; NoVLX-NEXT: vzeroupper
12313 %0 = bitcast <8 x i64> %__a to <16 x i32>
12314 %load = load i32, i32* %__b
12315 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12316 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12317 %2 = icmp sge <16 x i32> %0, %1
12318 %3 = bitcast i16 %__u to <16 x i1>
12319 %4 = and <16 x i1> %3, %2
12320 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12321 %6 = bitcast <64 x i1> %5 to i64
12326 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12327 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12328 ; VLX: # %bb.0: # %entry
12329 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12330 ; VLX-NEXT: kmovb %k0, %eax
12333 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12334 ; NoVLX: # %bb.0: # %entry
12335 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12336 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12337 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12338 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12339 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12340 ; NoVLX-NEXT: kmovw %k0, %eax
12341 ; NoVLX-NEXT: vzeroupper
12344 %0 = bitcast <2 x i64> %__a to <2 x i64>
12345 %1 = bitcast <2 x i64> %__b to <2 x i64>
12346 %2 = icmp sge <2 x i64> %0, %1
12347 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12348 %4 = bitcast <4 x i1> %3 to i4
12352 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12353 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12354 ; VLX: # %bb.0: # %entry
12355 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12356 ; VLX-NEXT: kmovb %k0, %eax
12359 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12360 ; NoVLX: # %bb.0: # %entry
12361 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12362 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12363 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12364 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12365 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12366 ; NoVLX-NEXT: kmovw %k0, %eax
12367 ; NoVLX-NEXT: vzeroupper
12370 %0 = bitcast <2 x i64> %__a to <2 x i64>
12371 %load = load <2 x i64>, <2 x i64>* %__b
12372 %1 = bitcast <2 x i64> %load to <2 x i64>
12373 %2 = icmp sge <2 x i64> %0, %1
12374 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12375 %4 = bitcast <4 x i1> %3 to i4
12379 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12380 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12381 ; VLX: # %bb.0: # %entry
12382 ; VLX-NEXT: kmovd %edi, %k1
12383 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12384 ; VLX-NEXT: kmovb %k0, %eax
12387 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12388 ; NoVLX: # %bb.0: # %entry
12389 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12390 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12391 ; NoVLX-NEXT: kmovw %edi, %k1
12392 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12393 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12394 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12395 ; NoVLX-NEXT: kmovw %k0, %eax
12396 ; NoVLX-NEXT: vzeroupper
12399 %0 = bitcast <2 x i64> %__a to <2 x i64>
12400 %1 = bitcast <2 x i64> %__b to <2 x i64>
12401 %2 = icmp sge <2 x i64> %0, %1
12402 %3 = bitcast i8 %__u to <8 x i1>
12403 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12404 %4 = and <2 x i1> %2, %extract.i
12405 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12406 %6 = bitcast <4 x i1> %5 to i4
12410 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12411 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12412 ; VLX: # %bb.0: # %entry
12413 ; VLX-NEXT: kmovd %edi, %k1
12414 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12415 ; VLX-NEXT: kmovb %k0, %eax
12418 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12419 ; NoVLX: # %bb.0: # %entry
12420 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12421 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12422 ; NoVLX-NEXT: kmovw %edi, %k1
12423 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12424 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12425 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12426 ; NoVLX-NEXT: kmovw %k0, %eax
12427 ; NoVLX-NEXT: vzeroupper
12430 %0 = bitcast <2 x i64> %__a to <2 x i64>
12431 %load = load <2 x i64>, <2 x i64>* %__b
12432 %1 = bitcast <2 x i64> %load to <2 x i64>
12433 %2 = icmp sge <2 x i64> %0, %1
12434 %3 = bitcast i8 %__u to <8 x i1>
12435 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12436 %4 = and <2 x i1> %2, %extract.i
12437 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12438 %6 = bitcast <4 x i1> %5 to i4
12443 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12444 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12445 ; VLX: # %bb.0: # %entry
12446 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12447 ; VLX-NEXT: kmovb %k0, %eax
12450 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12451 ; NoVLX: # %bb.0: # %entry
12452 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12453 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
12454 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12455 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12456 ; NoVLX-NEXT: kmovw %k0, %eax
12457 ; NoVLX-NEXT: vzeroupper
12460 %0 = bitcast <2 x i64> %__a to <2 x i64>
12461 %load = load i64, i64* %__b
12462 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12463 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12464 %2 = icmp sge <2 x i64> %0, %1
12465 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12466 %4 = bitcast <4 x i1> %3 to i4
12470 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
12471 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12472 ; VLX: # %bb.0: # %entry
12473 ; VLX-NEXT: kmovd %edi, %k1
12474 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12475 ; VLX-NEXT: kmovb %k0, %eax
12478 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12479 ; NoVLX: # %bb.0: # %entry
12480 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12481 ; NoVLX-NEXT: kmovw %edi, %k1
12482 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
12483 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12484 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12485 ; NoVLX-NEXT: kmovw %k0, %eax
12486 ; NoVLX-NEXT: vzeroupper
12489 %0 = bitcast <2 x i64> %__a to <2 x i64>
12490 %load = load i64, i64* %__b
12491 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12492 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12493 %2 = icmp sge <2 x i64> %0, %1
12494 %3 = bitcast i8 %__u to <8 x i1>
12495 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12496 %4 = and <2 x i1> %extract.i, %2
12497 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12498 %6 = bitcast <4 x i1> %5 to i4
12503 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12504 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12505 ; VLX: # %bb.0: # %entry
12506 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12507 ; VLX-NEXT: kmovd %k0, %eax
12508 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12511 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12512 ; NoVLX: # %bb.0: # %entry
12513 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12514 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12515 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12516 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12517 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12518 ; NoVLX-NEXT: kmovw %k0, %eax
12519 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12520 ; NoVLX-NEXT: vzeroupper
12523 %0 = bitcast <2 x i64> %__a to <2 x i64>
12524 %1 = bitcast <2 x i64> %__b to <2 x i64>
12525 %2 = icmp sge <2 x i64> %0, %1
12526 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12527 %4 = bitcast <8 x i1> %3 to i8
12531 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12532 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12533 ; VLX: # %bb.0: # %entry
12534 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12535 ; VLX-NEXT: kmovd %k0, %eax
12536 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12539 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12540 ; NoVLX: # %bb.0: # %entry
12541 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12542 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12543 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12544 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12545 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12546 ; NoVLX-NEXT: kmovw %k0, %eax
12547 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12548 ; NoVLX-NEXT: vzeroupper
12551 %0 = bitcast <2 x i64> %__a to <2 x i64>
12552 %load = load <2 x i64>, <2 x i64>* %__b
12553 %1 = bitcast <2 x i64> %load to <2 x i64>
12554 %2 = icmp sge <2 x i64> %0, %1
12555 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12556 %4 = bitcast <8 x i1> %3 to i8
12560 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12561 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12562 ; VLX: # %bb.0: # %entry
12563 ; VLX-NEXT: kmovd %edi, %k1
12564 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12565 ; VLX-NEXT: kmovd %k0, %eax
12566 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12569 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12570 ; NoVLX: # %bb.0: # %entry
12571 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12572 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12573 ; NoVLX-NEXT: kmovw %edi, %k1
12574 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12575 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12576 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12577 ; NoVLX-NEXT: kmovw %k0, %eax
12578 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12579 ; NoVLX-NEXT: vzeroupper
12582 %0 = bitcast <2 x i64> %__a to <2 x i64>
12583 %1 = bitcast <2 x i64> %__b to <2 x i64>
12584 %2 = icmp sge <2 x i64> %0, %1
12585 %3 = bitcast i8 %__u to <8 x i1>
12586 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12587 %4 = and <2 x i1> %2, %extract.i
12588 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12589 %6 = bitcast <8 x i1> %5 to i8
12593 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12594 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12595 ; VLX: # %bb.0: # %entry
12596 ; VLX-NEXT: kmovd %edi, %k1
12597 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12598 ; VLX-NEXT: kmovd %k0, %eax
12599 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12602 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12603 ; NoVLX: # %bb.0: # %entry
12604 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12605 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12606 ; NoVLX-NEXT: kmovw %edi, %k1
12607 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12608 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12609 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12610 ; NoVLX-NEXT: kmovw %k0, %eax
12611 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12612 ; NoVLX-NEXT: vzeroupper
12615 %0 = bitcast <2 x i64> %__a to <2 x i64>
12616 %load = load <2 x i64>, <2 x i64>* %__b
12617 %1 = bitcast <2 x i64> %load to <2 x i64>
12618 %2 = icmp sge <2 x i64> %0, %1
12619 %3 = bitcast i8 %__u to <8 x i1>
12620 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12621 %4 = and <2 x i1> %2, %extract.i
12622 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12623 %6 = bitcast <8 x i1> %5 to i8
12628 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12629 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12630 ; VLX: # %bb.0: # %entry
12631 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12632 ; VLX-NEXT: kmovd %k0, %eax
12633 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12636 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12637 ; NoVLX: # %bb.0: # %entry
12638 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12639 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
12640 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12641 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12642 ; NoVLX-NEXT: kmovw %k0, %eax
12643 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12644 ; NoVLX-NEXT: vzeroupper
12647 %0 = bitcast <2 x i64> %__a to <2 x i64>
12648 %load = load i64, i64* %__b
12649 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12650 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12651 %2 = icmp sge <2 x i64> %0, %1
12652 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12653 %4 = bitcast <8 x i1> %3 to i8
12657 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
12658 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12659 ; VLX: # %bb.0: # %entry
12660 ; VLX-NEXT: kmovd %edi, %k1
12661 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12662 ; VLX-NEXT: kmovd %k0, %eax
12663 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12666 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12667 ; NoVLX: # %bb.0: # %entry
12668 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12669 ; NoVLX-NEXT: kmovw %edi, %k1
12670 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
12671 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12672 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12673 ; NoVLX-NEXT: kmovw %k0, %eax
12674 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12675 ; NoVLX-NEXT: vzeroupper
12678 %0 = bitcast <2 x i64> %__a to <2 x i64>
12679 %load = load i64, i64* %__b
12680 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12681 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12682 %2 = icmp sge <2 x i64> %0, %1
12683 %3 = bitcast i8 %__u to <8 x i1>
12684 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12685 %4 = and <2 x i1> %extract.i, %2
12686 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12687 %6 = bitcast <8 x i1> %5 to i8
12692 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12693 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12694 ; VLX: # %bb.0: # %entry
12695 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12696 ; VLX-NEXT: kmovd %k0, %eax
12697 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12700 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12701 ; NoVLX: # %bb.0: # %entry
12702 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12703 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12704 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12705 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12706 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12707 ; NoVLX-NEXT: kmovw %k0, %eax
12708 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12709 ; NoVLX-NEXT: vzeroupper
12712 %0 = bitcast <2 x i64> %__a to <2 x i64>
12713 %1 = bitcast <2 x i64> %__b to <2 x i64>
12714 %2 = icmp sge <2 x i64> %0, %1
12715 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12716 %4 = bitcast <16 x i1> %3 to i16
12720 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12721 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12722 ; VLX: # %bb.0: # %entry
12723 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12724 ; VLX-NEXT: kmovd %k0, %eax
12725 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12728 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12729 ; NoVLX: # %bb.0: # %entry
12730 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12731 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12732 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12733 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12734 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12735 ; NoVLX-NEXT: kmovw %k0, %eax
12736 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12737 ; NoVLX-NEXT: vzeroupper
12740 %0 = bitcast <2 x i64> %__a to <2 x i64>
12741 %load = load <2 x i64>, <2 x i64>* %__b
12742 %1 = bitcast <2 x i64> %load to <2 x i64>
12743 %2 = icmp sge <2 x i64> %0, %1
12744 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12745 %4 = bitcast <16 x i1> %3 to i16
12749 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12750 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
12751 ; VLX: # %bb.0: # %entry
12752 ; VLX-NEXT: kmovd %edi, %k1
12753 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12754 ; VLX-NEXT: kmovd %k0, %eax
12755 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12758 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
12759 ; NoVLX: # %bb.0: # %entry
12760 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12761 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12762 ; NoVLX-NEXT: kmovw %edi, %k1
12763 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12764 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12765 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12766 ; NoVLX-NEXT: kmovw %k0, %eax
12767 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12768 ; NoVLX-NEXT: vzeroupper
12771 %0 = bitcast <2 x i64> %__a to <2 x i64>
12772 %1 = bitcast <2 x i64> %__b to <2 x i64>
12773 %2 = icmp sge <2 x i64> %0, %1
12774 %3 = bitcast i8 %__u to <8 x i1>
12775 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12776 %4 = and <2 x i1> %2, %extract.i
12777 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12778 %6 = bitcast <16 x i1> %5 to i16
12782 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12783 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
12784 ; VLX: # %bb.0: # %entry
12785 ; VLX-NEXT: kmovd %edi, %k1
12786 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12787 ; VLX-NEXT: kmovd %k0, %eax
12788 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12791 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
12792 ; NoVLX: # %bb.0: # %entry
12793 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12794 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12795 ; NoVLX-NEXT: kmovw %edi, %k1
12796 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12797 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12798 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12799 ; NoVLX-NEXT: kmovw %k0, %eax
12800 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12801 ; NoVLX-NEXT: vzeroupper
12804 %0 = bitcast <2 x i64> %__a to <2 x i64>
12805 %load = load <2 x i64>, <2 x i64>* %__b
12806 %1 = bitcast <2 x i64> %load to <2 x i64>
12807 %2 = icmp sge <2 x i64> %0, %1
12808 %3 = bitcast i8 %__u to <8 x i1>
12809 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12810 %4 = and <2 x i1> %2, %extract.i
12811 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12812 %6 = bitcast <16 x i1> %5 to i16
12817 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12818 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12819 ; VLX: # %bb.0: # %entry
12820 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12821 ; VLX-NEXT: kmovd %k0, %eax
12822 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12825 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12826 ; NoVLX: # %bb.0: # %entry
12827 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12828 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
12829 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12830 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12831 ; NoVLX-NEXT: kmovw %k0, %eax
12832 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12833 ; NoVLX-NEXT: vzeroupper
12836 %0 = bitcast <2 x i64> %__a to <2 x i64>
12837 %load = load i64, i64* %__b
12838 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12839 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12840 %2 = icmp sge <2 x i64> %0, %1
12841 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12842 %4 = bitcast <16 x i1> %3 to i16
12846 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
12847 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12848 ; VLX: # %bb.0: # %entry
12849 ; VLX-NEXT: kmovd %edi, %k1
12850 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12851 ; VLX-NEXT: kmovd %k0, %eax
12852 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12855 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12856 ; NoVLX: # %bb.0: # %entry
12857 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12858 ; NoVLX-NEXT: kmovw %edi, %k1
12859 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
12860 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12861 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12862 ; NoVLX-NEXT: kmovw %k0, %eax
12863 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12864 ; NoVLX-NEXT: vzeroupper
12867 %0 = bitcast <2 x i64> %__a to <2 x i64>
12868 %load = load i64, i64* %__b
12869 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12870 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12871 %2 = icmp sge <2 x i64> %0, %1
12872 %3 = bitcast i8 %__u to <8 x i1>
12873 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12874 %4 = and <2 x i1> %extract.i, %2
12875 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12876 %6 = bitcast <16 x i1> %5 to i16
12881 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12882 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
12883 ; VLX: # %bb.0: # %entry
12884 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12885 ; VLX-NEXT: kmovd %k0, %eax
12888 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
12889 ; NoVLX: # %bb.0: # %entry
12890 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12891 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12892 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12893 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12894 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12895 ; NoVLX-NEXT: kmovw %k0, %eax
12896 ; NoVLX-NEXT: vzeroupper
12899 %0 = bitcast <2 x i64> %__a to <2 x i64>
12900 %1 = bitcast <2 x i64> %__b to <2 x i64>
12901 %2 = icmp sge <2 x i64> %0, %1
12902 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12903 %4 = bitcast <32 x i1> %3 to i32
12907 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12908 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
12909 ; VLX: # %bb.0: # %entry
12910 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12911 ; VLX-NEXT: kmovd %k0, %eax
12914 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
12915 ; NoVLX: # %bb.0: # %entry
12916 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12917 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12918 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12919 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12920 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12921 ; NoVLX-NEXT: kmovw %k0, %eax
12922 ; NoVLX-NEXT: vzeroupper
12925 %0 = bitcast <2 x i64> %__a to <2 x i64>
12926 %load = load <2 x i64>, <2 x i64>* %__b
12927 %1 = bitcast <2 x i64> %load to <2 x i64>
12928 %2 = icmp sge <2 x i64> %0, %1
12929 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12930 %4 = bitcast <32 x i1> %3 to i32
12934 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12935 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
12936 ; VLX: # %bb.0: # %entry
12937 ; VLX-NEXT: kmovd %edi, %k1
12938 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12939 ; VLX-NEXT: kmovd %k0, %eax
12942 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
12943 ; NoVLX: # %bb.0: # %entry
12944 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12945 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12946 ; NoVLX-NEXT: kmovw %edi, %k1
12947 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12948 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12949 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12950 ; NoVLX-NEXT: kmovw %k0, %eax
12951 ; NoVLX-NEXT: vzeroupper
12954 %0 = bitcast <2 x i64> %__a to <2 x i64>
12955 %1 = bitcast <2 x i64> %__b to <2 x i64>
12956 %2 = icmp sge <2 x i64> %0, %1
12957 %3 = bitcast i8 %__u to <8 x i1>
12958 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12959 %4 = and <2 x i1> %2, %extract.i
12960 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12961 %6 = bitcast <32 x i1> %5 to i32
12965 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12966 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
12967 ; VLX: # %bb.0: # %entry
12968 ; VLX-NEXT: kmovd %edi, %k1
12969 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12970 ; VLX-NEXT: kmovd %k0, %eax
12973 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
12974 ; NoVLX: # %bb.0: # %entry
12975 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12976 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12977 ; NoVLX-NEXT: kmovw %edi, %k1
12978 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12979 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12980 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12981 ; NoVLX-NEXT: kmovw %k0, %eax
12982 ; NoVLX-NEXT: vzeroupper
12985 %0 = bitcast <2 x i64> %__a to <2 x i64>
12986 %load = load <2 x i64>, <2 x i64>* %__b
12987 %1 = bitcast <2 x i64> %load to <2 x i64>
12988 %2 = icmp sge <2 x i64> %0, %1
12989 %3 = bitcast i8 %__u to <8 x i1>
12990 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12991 %4 = and <2 x i1> %2, %extract.i
12992 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12993 %6 = bitcast <32 x i1> %5 to i32
12998 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12999 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13000 ; VLX: # %bb.0: # %entry
13001 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13002 ; VLX-NEXT: kmovd %k0, %eax
13005 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13006 ; NoVLX: # %bb.0: # %entry
13007 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13008 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13009 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13010 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13011 ; NoVLX-NEXT: kmovw %k0, %eax
13012 ; NoVLX-NEXT: vzeroupper
13015 %0 = bitcast <2 x i64> %__a to <2 x i64>
13016 %load = load i64, i64* %__b
13017 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13018 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13019 %2 = icmp sge <2 x i64> %0, %1
13020 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13021 %4 = bitcast <32 x i1> %3 to i32
13025 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
13026 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13027 ; VLX: # %bb.0: # %entry
13028 ; VLX-NEXT: kmovd %edi, %k1
13029 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13030 ; VLX-NEXT: kmovd %k0, %eax
13033 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13034 ; NoVLX: # %bb.0: # %entry
13035 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13036 ; NoVLX-NEXT: kmovw %edi, %k1
13037 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13038 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13039 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13040 ; NoVLX-NEXT: kmovw %k0, %eax
13041 ; NoVLX-NEXT: vzeroupper
13044 %0 = bitcast <2 x i64> %__a to <2 x i64>
13045 %load = load i64, i64* %__b
13046 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13047 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13048 %2 = icmp sge <2 x i64> %0, %1
13049 %3 = bitcast i8 %__u to <8 x i1>
13050 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13051 %4 = and <2 x i1> %extract.i, %2
13052 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13053 %6 = bitcast <32 x i1> %5 to i32
13058 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13059 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13060 ; VLX: # %bb.0: # %entry
13061 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
13062 ; VLX-NEXT: kmovq %k0, %rax
13065 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13066 ; NoVLX: # %bb.0: # %entry
13067 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13068 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13069 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13070 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13071 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13072 ; NoVLX-NEXT: kmovw %k0, %eax
13073 ; NoVLX-NEXT: vzeroupper
13076 %0 = bitcast <2 x i64> %__a to <2 x i64>
13077 %1 = bitcast <2 x i64> %__b to <2 x i64>
13078 %2 = icmp sge <2 x i64> %0, %1
13079 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13080 %4 = bitcast <64 x i1> %3 to i64
13084 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13085 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13086 ; VLX: # %bb.0: # %entry
13087 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
13088 ; VLX-NEXT: kmovq %k0, %rax
13091 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13092 ; NoVLX: # %bb.0: # %entry
13093 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13094 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
13095 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13096 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13097 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13098 ; NoVLX-NEXT: kmovw %k0, %eax
13099 ; NoVLX-NEXT: vzeroupper
13102 %0 = bitcast <2 x i64> %__a to <2 x i64>
13103 %load = load <2 x i64>, <2 x i64>* %__b
13104 %1 = bitcast <2 x i64> %load to <2 x i64>
13105 %2 = icmp sge <2 x i64> %0, %1
13106 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13107 %4 = bitcast <64 x i1> %3 to i64
13111 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13112 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13113 ; VLX: # %bb.0: # %entry
13114 ; VLX-NEXT: kmovd %edi, %k1
13115 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
13116 ; VLX-NEXT: kmovq %k0, %rax
13119 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13120 ; NoVLX: # %bb.0: # %entry
13121 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13122 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13123 ; NoVLX-NEXT: kmovw %edi, %k1
13124 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13125 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13126 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13127 ; NoVLX-NEXT: kmovw %k0, %eax
13128 ; NoVLX-NEXT: vzeroupper
13131 %0 = bitcast <2 x i64> %__a to <2 x i64>
13132 %1 = bitcast <2 x i64> %__b to <2 x i64>
13133 %2 = icmp sge <2 x i64> %0, %1
13134 %3 = bitcast i8 %__u to <8 x i1>
13135 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13136 %4 = and <2 x i1> %2, %extract.i
13137 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13138 %6 = bitcast <64 x i1> %5 to i64
13142 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13143 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13144 ; VLX: # %bb.0: # %entry
13145 ; VLX-NEXT: kmovd %edi, %k1
13146 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
13147 ; VLX-NEXT: kmovq %k0, %rax
13150 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13151 ; NoVLX: # %bb.0: # %entry
13152 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13153 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
13154 ; NoVLX-NEXT: kmovw %edi, %k1
13155 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13156 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13157 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13158 ; NoVLX-NEXT: kmovw %k0, %eax
13159 ; NoVLX-NEXT: vzeroupper
13162 %0 = bitcast <2 x i64> %__a to <2 x i64>
13163 %load = load <2 x i64>, <2 x i64>* %__b
13164 %1 = bitcast <2 x i64> %load to <2 x i64>
13165 %2 = icmp sge <2 x i64> %0, %1
13166 %3 = bitcast i8 %__u to <8 x i1>
13167 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13168 %4 = and <2 x i1> %2, %extract.i
13169 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13170 %6 = bitcast <64 x i1> %5 to i64
13175 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
13176 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13177 ; VLX: # %bb.0: # %entry
13178 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13179 ; VLX-NEXT: kmovq %k0, %rax
13182 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13183 ; NoVLX: # %bb.0: # %entry
13184 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13185 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13186 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13187 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13188 ; NoVLX-NEXT: kmovw %k0, %eax
13189 ; NoVLX-NEXT: vzeroupper
13192 %0 = bitcast <2 x i64> %__a to <2 x i64>
13193 %load = load i64, i64* %__b
13194 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13195 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13196 %2 = icmp sge <2 x i64> %0, %1
13197 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13198 %4 = bitcast <64 x i1> %3 to i64
13202 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
13203 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13204 ; VLX: # %bb.0: # %entry
13205 ; VLX-NEXT: kmovd %edi, %k1
13206 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13207 ; VLX-NEXT: kmovq %k0, %rax
13210 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13211 ; NoVLX: # %bb.0: # %entry
13212 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13213 ; NoVLX-NEXT: kmovw %edi, %k1
13214 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13215 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13216 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13217 ; NoVLX-NEXT: kmovw %k0, %eax
13218 ; NoVLX-NEXT: vzeroupper
13221 %0 = bitcast <2 x i64> %__a to <2 x i64>
13222 %load = load i64, i64* %__b
13223 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13224 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13225 %2 = icmp sge <2 x i64> %0, %1
13226 %3 = bitcast i8 %__u to <8 x i1>
13227 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13228 %4 = and <2 x i1> %extract.i, %2
13229 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13230 %6 = bitcast <64 x i1> %5 to i64
13235 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13236 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13237 ; VLX: # %bb.0: # %entry
13238 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13239 ; VLX-NEXT: kmovd %k0, %eax
13240 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13241 ; VLX-NEXT: vzeroupper
13244 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13245 ; NoVLX: # %bb.0: # %entry
13246 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13247 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13248 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13249 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13250 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13251 ; NoVLX-NEXT: kmovw %k0, %eax
13252 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13253 ; NoVLX-NEXT: vzeroupper
13256 %0 = bitcast <4 x i64> %__a to <4 x i64>
13257 %1 = bitcast <4 x i64> %__b to <4 x i64>
13258 %2 = icmp sge <4 x i64> %0, %1
13259 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13260 %4 = bitcast <8 x i1> %3 to i8
13264 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13265 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13266 ; VLX: # %bb.0: # %entry
13267 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13268 ; VLX-NEXT: kmovd %k0, %eax
13269 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13270 ; VLX-NEXT: vzeroupper
13273 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13274 ; NoVLX: # %bb.0: # %entry
13275 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13276 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13277 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13278 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13279 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13280 ; NoVLX-NEXT: kmovw %k0, %eax
13281 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13282 ; NoVLX-NEXT: vzeroupper
13285 %0 = bitcast <4 x i64> %__a to <4 x i64>
13286 %load = load <4 x i64>, <4 x i64>* %__b
13287 %1 = bitcast <4 x i64> %load to <4 x i64>
13288 %2 = icmp sge <4 x i64> %0, %1
13289 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13290 %4 = bitcast <8 x i1> %3 to i8
13294 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13295 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13296 ; VLX: # %bb.0: # %entry
13297 ; VLX-NEXT: kmovd %edi, %k1
13298 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13299 ; VLX-NEXT: kmovd %k0, %eax
13300 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13301 ; VLX-NEXT: vzeroupper
13304 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13305 ; NoVLX: # %bb.0: # %entry
13306 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13307 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13308 ; NoVLX-NEXT: kmovw %edi, %k1
13309 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13310 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13311 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13312 ; NoVLX-NEXT: kmovw %k0, %eax
13313 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13314 ; NoVLX-NEXT: vzeroupper
13317 %0 = bitcast <4 x i64> %__a to <4 x i64>
13318 %1 = bitcast <4 x i64> %__b to <4 x i64>
13319 %2 = icmp sge <4 x i64> %0, %1
13320 %3 = bitcast i8 %__u to <8 x i1>
13321 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13322 %4 = and <4 x i1> %2, %extract.i
13323 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13324 %6 = bitcast <8 x i1> %5 to i8
13328 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13329 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13330 ; VLX: # %bb.0: # %entry
13331 ; VLX-NEXT: kmovd %edi, %k1
13332 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13333 ; VLX-NEXT: kmovd %k0, %eax
13334 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13335 ; VLX-NEXT: vzeroupper
13338 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13339 ; NoVLX: # %bb.0: # %entry
13340 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13341 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13342 ; NoVLX-NEXT: kmovw %edi, %k1
13343 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13344 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13345 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13346 ; NoVLX-NEXT: kmovw %k0, %eax
13347 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13348 ; NoVLX-NEXT: vzeroupper
13351 %0 = bitcast <4 x i64> %__a to <4 x i64>
13352 %load = load <4 x i64>, <4 x i64>* %__b
13353 %1 = bitcast <4 x i64> %load to <4 x i64>
13354 %2 = icmp sge <4 x i64> %0, %1
13355 %3 = bitcast i8 %__u to <8 x i1>
13356 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13357 %4 = and <4 x i1> %2, %extract.i
13358 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13359 %6 = bitcast <8 x i1> %5 to i8
13364 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13365 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13366 ; VLX: # %bb.0: # %entry
13367 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13368 ; VLX-NEXT: kmovd %k0, %eax
13369 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13370 ; VLX-NEXT: vzeroupper
13373 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13374 ; NoVLX: # %bb.0: # %entry
13375 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13376 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13377 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13378 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13379 ; NoVLX-NEXT: kmovw %k0, %eax
13380 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13381 ; NoVLX-NEXT: vzeroupper
13384 %0 = bitcast <4 x i64> %__a to <4 x i64>
13385 %load = load i64, i64* %__b
13386 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13387 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13388 %2 = icmp sge <4 x i64> %0, %1
13389 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13390 %4 = bitcast <8 x i1> %3 to i8
13394 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13395 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13396 ; VLX: # %bb.0: # %entry
13397 ; VLX-NEXT: kmovd %edi, %k1
13398 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13399 ; VLX-NEXT: kmovd %k0, %eax
13400 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13401 ; VLX-NEXT: vzeroupper
13404 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13405 ; NoVLX: # %bb.0: # %entry
13406 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13407 ; NoVLX-NEXT: kmovw %edi, %k1
13408 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13409 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13410 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13411 ; NoVLX-NEXT: kmovw %k0, %eax
13412 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13413 ; NoVLX-NEXT: vzeroupper
13416 %0 = bitcast <4 x i64> %__a to <4 x i64>
13417 %load = load i64, i64* %__b
13418 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13419 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13420 %2 = icmp sge <4 x i64> %0, %1
13421 %3 = bitcast i8 %__u to <8 x i1>
13422 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13423 %4 = and <4 x i1> %extract.i, %2
13424 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13425 %6 = bitcast <8 x i1> %5 to i8
13430 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13431 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13432 ; VLX: # %bb.0: # %entry
13433 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13434 ; VLX-NEXT: kmovd %k0, %eax
13435 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13436 ; VLX-NEXT: vzeroupper
13439 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13440 ; NoVLX: # %bb.0: # %entry
13441 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13442 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13443 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13444 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13445 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13446 ; NoVLX-NEXT: kmovw %k0, %eax
13447 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13448 ; NoVLX-NEXT: vzeroupper
13451 %0 = bitcast <4 x i64> %__a to <4 x i64>
13452 %1 = bitcast <4 x i64> %__b to <4 x i64>
13453 %2 = icmp sge <4 x i64> %0, %1
13454 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13455 %4 = bitcast <16 x i1> %3 to i16
13459 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13460 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13461 ; VLX: # %bb.0: # %entry
13462 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13463 ; VLX-NEXT: kmovd %k0, %eax
13464 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13465 ; VLX-NEXT: vzeroupper
13468 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13469 ; NoVLX: # %bb.0: # %entry
13470 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13471 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13472 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13473 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13474 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13475 ; NoVLX-NEXT: kmovw %k0, %eax
13476 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13477 ; NoVLX-NEXT: vzeroupper
13480 %0 = bitcast <4 x i64> %__a to <4 x i64>
13481 %load = load <4 x i64>, <4 x i64>* %__b
13482 %1 = bitcast <4 x i64> %load to <4 x i64>
13483 %2 = icmp sge <4 x i64> %0, %1
13484 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13485 %4 = bitcast <16 x i1> %3 to i16
13489 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13490 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13491 ; VLX: # %bb.0: # %entry
13492 ; VLX-NEXT: kmovd %edi, %k1
13493 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13494 ; VLX-NEXT: kmovd %k0, %eax
13495 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13496 ; VLX-NEXT: vzeroupper
13499 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13500 ; NoVLX: # %bb.0: # %entry
13501 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13502 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13503 ; NoVLX-NEXT: kmovw %edi, %k1
13504 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13505 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13506 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13507 ; NoVLX-NEXT: kmovw %k0, %eax
13508 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13509 ; NoVLX-NEXT: vzeroupper
13512 %0 = bitcast <4 x i64> %__a to <4 x i64>
13513 %1 = bitcast <4 x i64> %__b to <4 x i64>
13514 %2 = icmp sge <4 x i64> %0, %1
13515 %3 = bitcast i8 %__u to <8 x i1>
13516 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13517 %4 = and <4 x i1> %2, %extract.i
13518 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13519 %6 = bitcast <16 x i1> %5 to i16
13523 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13524 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13525 ; VLX: # %bb.0: # %entry
13526 ; VLX-NEXT: kmovd %edi, %k1
13527 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13528 ; VLX-NEXT: kmovd %k0, %eax
13529 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13530 ; VLX-NEXT: vzeroupper
13533 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13534 ; NoVLX: # %bb.0: # %entry
13535 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13536 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13537 ; NoVLX-NEXT: kmovw %edi, %k1
13538 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13539 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13540 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13541 ; NoVLX-NEXT: kmovw %k0, %eax
13542 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13543 ; NoVLX-NEXT: vzeroupper
13546 %0 = bitcast <4 x i64> %__a to <4 x i64>
13547 %load = load <4 x i64>, <4 x i64>* %__b
13548 %1 = bitcast <4 x i64> %load to <4 x i64>
13549 %2 = icmp sge <4 x i64> %0, %1
13550 %3 = bitcast i8 %__u to <8 x i1>
13551 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13552 %4 = and <4 x i1> %2, %extract.i
13553 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13554 %6 = bitcast <16 x i1> %5 to i16
13559 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13560 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13561 ; VLX: # %bb.0: # %entry
13562 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13563 ; VLX-NEXT: kmovd %k0, %eax
13564 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13565 ; VLX-NEXT: vzeroupper
13568 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13569 ; NoVLX: # %bb.0: # %entry
13570 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13571 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13572 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13573 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13574 ; NoVLX-NEXT: kmovw %k0, %eax
13575 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13576 ; NoVLX-NEXT: vzeroupper
13579 %0 = bitcast <4 x i64> %__a to <4 x i64>
13580 %load = load i64, i64* %__b
13581 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13582 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13583 %2 = icmp sge <4 x i64> %0, %1
13584 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13585 %4 = bitcast <16 x i1> %3 to i16
13589 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13590 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13591 ; VLX: # %bb.0: # %entry
13592 ; VLX-NEXT: kmovd %edi, %k1
13593 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13594 ; VLX-NEXT: kmovd %k0, %eax
13595 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13596 ; VLX-NEXT: vzeroupper
13599 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13600 ; NoVLX: # %bb.0: # %entry
13601 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13602 ; NoVLX-NEXT: kmovw %edi, %k1
13603 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13604 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13605 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13606 ; NoVLX-NEXT: kmovw %k0, %eax
13607 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13608 ; NoVLX-NEXT: vzeroupper
13611 %0 = bitcast <4 x i64> %__a to <4 x i64>
13612 %load = load i64, i64* %__b
13613 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13614 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13615 %2 = icmp sge <4 x i64> %0, %1
13616 %3 = bitcast i8 %__u to <8 x i1>
13617 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13618 %4 = and <4 x i1> %extract.i, %2
13619 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13620 %6 = bitcast <16 x i1> %5 to i16
13625 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13626 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13627 ; VLX: # %bb.0: # %entry
13628 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13629 ; VLX-NEXT: kmovd %k0, %eax
13630 ; VLX-NEXT: vzeroupper
13633 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13634 ; NoVLX: # %bb.0: # %entry
13635 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13636 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13637 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13638 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13639 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13640 ; NoVLX-NEXT: kmovw %k0, %eax
13641 ; NoVLX-NEXT: vzeroupper
13644 %0 = bitcast <4 x i64> %__a to <4 x i64>
13645 %1 = bitcast <4 x i64> %__b to <4 x i64>
13646 %2 = icmp sge <4 x i64> %0, %1
13647 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13648 %4 = bitcast <32 x i1> %3 to i32
13652 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13653 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13654 ; VLX: # %bb.0: # %entry
13655 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13656 ; VLX-NEXT: kmovd %k0, %eax
13657 ; VLX-NEXT: vzeroupper
13660 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13661 ; NoVLX: # %bb.0: # %entry
13662 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13663 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13664 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13665 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13666 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13667 ; NoVLX-NEXT: kmovw %k0, %eax
13668 ; NoVLX-NEXT: vzeroupper
13671 %0 = bitcast <4 x i64> %__a to <4 x i64>
13672 %load = load <4 x i64>, <4 x i64>* %__b
13673 %1 = bitcast <4 x i64> %load to <4 x i64>
13674 %2 = icmp sge <4 x i64> %0, %1
13675 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13676 %4 = bitcast <32 x i1> %3 to i32
13680 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13681 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13682 ; VLX: # %bb.0: # %entry
13683 ; VLX-NEXT: kmovd %edi, %k1
13684 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13685 ; VLX-NEXT: kmovd %k0, %eax
13686 ; VLX-NEXT: vzeroupper
13689 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13690 ; NoVLX: # %bb.0: # %entry
13691 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13692 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13693 ; NoVLX-NEXT: kmovw %edi, %k1
13694 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13695 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13696 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13697 ; NoVLX-NEXT: kmovw %k0, %eax
13698 ; NoVLX-NEXT: vzeroupper
13701 %0 = bitcast <4 x i64> %__a to <4 x i64>
13702 %1 = bitcast <4 x i64> %__b to <4 x i64>
13703 %2 = icmp sge <4 x i64> %0, %1
13704 %3 = bitcast i8 %__u to <8 x i1>
13705 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13706 %4 = and <4 x i1> %2, %extract.i
13707 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13708 %6 = bitcast <32 x i1> %5 to i32
13712 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13713 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13714 ; VLX: # %bb.0: # %entry
13715 ; VLX-NEXT: kmovd %edi, %k1
13716 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13717 ; VLX-NEXT: kmovd %k0, %eax
13718 ; VLX-NEXT: vzeroupper
13721 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13722 ; NoVLX: # %bb.0: # %entry
13723 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13724 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13725 ; NoVLX-NEXT: kmovw %edi, %k1
13726 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13727 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13728 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13729 ; NoVLX-NEXT: kmovw %k0, %eax
13730 ; NoVLX-NEXT: vzeroupper
13733 %0 = bitcast <4 x i64> %__a to <4 x i64>
13734 %load = load <4 x i64>, <4 x i64>* %__b
13735 %1 = bitcast <4 x i64> %load to <4 x i64>
13736 %2 = icmp sge <4 x i64> %0, %1
13737 %3 = bitcast i8 %__u to <8 x i1>
13738 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13739 %4 = and <4 x i1> %2, %extract.i
13740 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13741 %6 = bitcast <32 x i1> %5 to i32
13746 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13747 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13748 ; VLX: # %bb.0: # %entry
13749 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13750 ; VLX-NEXT: kmovd %k0, %eax
13751 ; VLX-NEXT: vzeroupper
13754 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13755 ; NoVLX: # %bb.0: # %entry
13756 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13757 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13758 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13759 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13760 ; NoVLX-NEXT: kmovw %k0, %eax
13761 ; NoVLX-NEXT: vzeroupper
13764 %0 = bitcast <4 x i64> %__a to <4 x i64>
13765 %load = load i64, i64* %__b
13766 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13767 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13768 %2 = icmp sge <4 x i64> %0, %1
13769 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13770 %4 = bitcast <32 x i1> %3 to i32
13774 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13775 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13776 ; VLX: # %bb.0: # %entry
13777 ; VLX-NEXT: kmovd %edi, %k1
13778 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13779 ; VLX-NEXT: kmovd %k0, %eax
13780 ; VLX-NEXT: vzeroupper
13783 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13784 ; NoVLX: # %bb.0: # %entry
13785 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13786 ; NoVLX-NEXT: kmovw %edi, %k1
13787 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13788 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13789 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13790 ; NoVLX-NEXT: kmovw %k0, %eax
13791 ; NoVLX-NEXT: vzeroupper
13794 %0 = bitcast <4 x i64> %__a to <4 x i64>
13795 %load = load i64, i64* %__b
13796 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13797 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13798 %2 = icmp sge <4 x i64> %0, %1
13799 %3 = bitcast i8 %__u to <8 x i1>
13800 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13801 %4 = and <4 x i1> %extract.i, %2
13802 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13803 %6 = bitcast <32 x i1> %5 to i32
13808 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13809 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
13810 ; VLX: # %bb.0: # %entry
13811 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13812 ; VLX-NEXT: kmovq %k0, %rax
13813 ; VLX-NEXT: vzeroupper
13816 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
13817 ; NoVLX: # %bb.0: # %entry
13818 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13819 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13820 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13821 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13822 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13823 ; NoVLX-NEXT: kmovw %k0, %eax
13824 ; NoVLX-NEXT: vzeroupper
13827 %0 = bitcast <4 x i64> %__a to <4 x i64>
13828 %1 = bitcast <4 x i64> %__b to <4 x i64>
13829 %2 = icmp sge <4 x i64> %0, %1
13830 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13831 %4 = bitcast <64 x i1> %3 to i64
13835 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13836 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
13837 ; VLX: # %bb.0: # %entry
13838 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13839 ; VLX-NEXT: kmovq %k0, %rax
13840 ; VLX-NEXT: vzeroupper
13843 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
13844 ; NoVLX: # %bb.0: # %entry
13845 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13846 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13847 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13848 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13849 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13850 ; NoVLX-NEXT: kmovw %k0, %eax
13851 ; NoVLX-NEXT: vzeroupper
13854 %0 = bitcast <4 x i64> %__a to <4 x i64>
13855 %load = load <4 x i64>, <4 x i64>* %__b
13856 %1 = bitcast <4 x i64> %load to <4 x i64>
13857 %2 = icmp sge <4 x i64> %0, %1
13858 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13859 %4 = bitcast <64 x i1> %3 to i64
13863 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13864 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
13865 ; VLX: # %bb.0: # %entry
13866 ; VLX-NEXT: kmovd %edi, %k1
13867 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13868 ; VLX-NEXT: kmovq %k0, %rax
13869 ; VLX-NEXT: vzeroupper
13872 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
13873 ; NoVLX: # %bb.0: # %entry
13874 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13875 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13876 ; NoVLX-NEXT: kmovw %edi, %k1
13877 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13878 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13879 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13880 ; NoVLX-NEXT: kmovw %k0, %eax
13881 ; NoVLX-NEXT: vzeroupper
13884 %0 = bitcast <4 x i64> %__a to <4 x i64>
13885 %1 = bitcast <4 x i64> %__b to <4 x i64>
13886 %2 = icmp sge <4 x i64> %0, %1
13887 %3 = bitcast i8 %__u to <8 x i1>
13888 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13889 %4 = and <4 x i1> %2, %extract.i
13890 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13891 %6 = bitcast <64 x i1> %5 to i64
13895 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13896 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
13897 ; VLX: # %bb.0: # %entry
13898 ; VLX-NEXT: kmovd %edi, %k1
13899 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13900 ; VLX-NEXT: kmovq %k0, %rax
13901 ; VLX-NEXT: vzeroupper
13904 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
13905 ; NoVLX: # %bb.0: # %entry
13906 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13907 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13908 ; NoVLX-NEXT: kmovw %edi, %k1
13909 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13910 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13911 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13912 ; NoVLX-NEXT: kmovw %k0, %eax
13913 ; NoVLX-NEXT: vzeroupper
13916 %0 = bitcast <4 x i64> %__a to <4 x i64>
13917 %load = load <4 x i64>, <4 x i64>* %__b
13918 %1 = bitcast <4 x i64> %load to <4 x i64>
13919 %2 = icmp sge <4 x i64> %0, %1
13920 %3 = bitcast i8 %__u to <8 x i1>
13921 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13922 %4 = and <4 x i1> %2, %extract.i
13923 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13924 %6 = bitcast <64 x i1> %5 to i64
13929 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13930 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13931 ; VLX: # %bb.0: # %entry
13932 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13933 ; VLX-NEXT: kmovq %k0, %rax
13934 ; VLX-NEXT: vzeroupper
13937 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13938 ; NoVLX: # %bb.0: # %entry
13939 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13940 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13941 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13942 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13943 ; NoVLX-NEXT: kmovw %k0, %eax
13944 ; NoVLX-NEXT: vzeroupper
13947 %0 = bitcast <4 x i64> %__a to <4 x i64>
13948 %load = load i64, i64* %__b
13949 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13950 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13951 %2 = icmp sge <4 x i64> %0, %1
13952 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13953 %4 = bitcast <64 x i1> %3 to i64
13957 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13958 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13959 ; VLX: # %bb.0: # %entry
13960 ; VLX-NEXT: kmovd %edi, %k1
13961 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13962 ; VLX-NEXT: kmovq %k0, %rax
13963 ; VLX-NEXT: vzeroupper
13966 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13967 ; NoVLX: # %bb.0: # %entry
13968 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13969 ; NoVLX-NEXT: kmovw %edi, %k1
13970 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13971 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13972 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13973 ; NoVLX-NEXT: kmovw %k0, %eax
13974 ; NoVLX-NEXT: vzeroupper
13977 %0 = bitcast <4 x i64> %__a to <4 x i64>
13978 %load = load i64, i64* %__b
13979 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13980 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13981 %2 = icmp sge <4 x i64> %0, %1
13982 %3 = bitcast i8 %__u to <8 x i1>
13983 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13984 %4 = and <4 x i1> %extract.i, %2
13985 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13986 %6 = bitcast <64 x i1> %5 to i64
13991 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
13992 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
13993 ; VLX: # %bb.0: # %entry
13994 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13995 ; VLX-NEXT: kmovd %k0, %eax
13996 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13997 ; VLX-NEXT: vzeroupper
14000 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14001 ; NoVLX: # %bb.0: # %entry
14002 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14003 ; NoVLX-NEXT: kmovw %k0, %eax
14004 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14005 ; NoVLX-NEXT: vzeroupper
14008 %0 = bitcast <8 x i64> %__a to <8 x i64>
14009 %1 = bitcast <8 x i64> %__b to <8 x i64>
14010 %2 = icmp sge <8 x i64> %0, %1
14011 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14012 %4 = bitcast <16 x i1> %3 to i16
14016 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14017 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14018 ; VLX: # %bb.0: # %entry
14019 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14020 ; VLX-NEXT: kmovd %k0, %eax
14021 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14022 ; VLX-NEXT: vzeroupper
14025 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14026 ; NoVLX: # %bb.0: # %entry
14027 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14028 ; NoVLX-NEXT: kmovw %k0, %eax
14029 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14030 ; NoVLX-NEXT: vzeroupper
14033 %0 = bitcast <8 x i64> %__a to <8 x i64>
14034 %load = load <8 x i64>, <8 x i64>* %__b
14035 %1 = bitcast <8 x i64> %load to <8 x i64>
14036 %2 = icmp sge <8 x i64> %0, %1
14037 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14038 %4 = bitcast <16 x i1> %3 to i16
14042 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14043 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14044 ; VLX: # %bb.0: # %entry
14045 ; VLX-NEXT: kmovd %edi, %k1
14046 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14047 ; VLX-NEXT: kmovd %k0, %eax
14048 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14049 ; VLX-NEXT: vzeroupper
14052 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14053 ; NoVLX: # %bb.0: # %entry
14054 ; NoVLX-NEXT: kmovw %edi, %k1
14055 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14056 ; NoVLX-NEXT: kmovw %k0, %eax
14057 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14058 ; NoVLX-NEXT: vzeroupper
14061 %0 = bitcast <8 x i64> %__a to <8 x i64>
14062 %1 = bitcast <8 x i64> %__b to <8 x i64>
14063 %2 = icmp sge <8 x i64> %0, %1
14064 %3 = bitcast i8 %__u to <8 x i1>
14065 %4 = and <8 x i1> %2, %3
14066 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14067 %6 = bitcast <16 x i1> %5 to i16
14071 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14072 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14073 ; VLX: # %bb.0: # %entry
14074 ; VLX-NEXT: kmovd %edi, %k1
14075 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14076 ; VLX-NEXT: kmovd %k0, %eax
14077 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14078 ; VLX-NEXT: vzeroupper
14081 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14082 ; NoVLX: # %bb.0: # %entry
14083 ; NoVLX-NEXT: kmovw %edi, %k1
14084 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14085 ; NoVLX-NEXT: kmovw %k0, %eax
14086 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14087 ; NoVLX-NEXT: vzeroupper
14090 %0 = bitcast <8 x i64> %__a to <8 x i64>
14091 %load = load <8 x i64>, <8 x i64>* %__b
14092 %1 = bitcast <8 x i64> %load to <8 x i64>
14093 %2 = icmp sge <8 x i64> %0, %1
14094 %3 = bitcast i8 %__u to <8 x i1>
14095 %4 = and <8 x i1> %2, %3
14096 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14097 %6 = bitcast <16 x i1> %5 to i16
14102 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14103 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14104 ; VLX: # %bb.0: # %entry
14105 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14106 ; VLX-NEXT: kmovd %k0, %eax
14107 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14108 ; VLX-NEXT: vzeroupper
14111 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14112 ; NoVLX: # %bb.0: # %entry
14113 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14114 ; NoVLX-NEXT: kmovw %k0, %eax
14115 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14116 ; NoVLX-NEXT: vzeroupper
14119 %0 = bitcast <8 x i64> %__a to <8 x i64>
14120 %load = load i64, i64* %__b
14121 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14122 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14123 %2 = icmp sge <8 x i64> %0, %1
14124 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14125 %4 = bitcast <16 x i1> %3 to i16
14129 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14130 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14131 ; VLX: # %bb.0: # %entry
14132 ; VLX-NEXT: kmovd %edi, %k1
14133 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14134 ; VLX-NEXT: kmovd %k0, %eax
14135 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14136 ; VLX-NEXT: vzeroupper
14139 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14140 ; NoVLX: # %bb.0: # %entry
14141 ; NoVLX-NEXT: kmovw %edi, %k1
14142 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14143 ; NoVLX-NEXT: kmovw %k0, %eax
14144 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14145 ; NoVLX-NEXT: vzeroupper
14148 %0 = bitcast <8 x i64> %__a to <8 x i64>
14149 %load = load i64, i64* %__b
14150 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14151 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14152 %2 = icmp sge <8 x i64> %0, %1
14153 %3 = bitcast i8 %__u to <8 x i1>
14154 %4 = and <8 x i1> %3, %2
14155 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14156 %6 = bitcast <16 x i1> %5 to i16
14161 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14162 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14163 ; VLX: # %bb.0: # %entry
14164 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14165 ; VLX-NEXT: kmovd %k0, %eax
14166 ; VLX-NEXT: vzeroupper
14169 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14170 ; NoVLX: # %bb.0: # %entry
14171 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14172 ; NoVLX-NEXT: kmovw %k0, %eax
14173 ; NoVLX-NEXT: vzeroupper
14176 %0 = bitcast <8 x i64> %__a to <8 x i64>
14177 %1 = bitcast <8 x i64> %__b to <8 x i64>
14178 %2 = icmp sge <8 x i64> %0, %1
14179 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14180 %4 = bitcast <32 x i1> %3 to i32
14184 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14185 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14186 ; VLX: # %bb.0: # %entry
14187 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14188 ; VLX-NEXT: kmovd %k0, %eax
14189 ; VLX-NEXT: vzeroupper
14192 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14193 ; NoVLX: # %bb.0: # %entry
14194 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14195 ; NoVLX-NEXT: kmovw %k0, %eax
14196 ; NoVLX-NEXT: vzeroupper
14199 %0 = bitcast <8 x i64> %__a to <8 x i64>
14200 %load = load <8 x i64>, <8 x i64>* %__b
14201 %1 = bitcast <8 x i64> %load to <8 x i64>
14202 %2 = icmp sge <8 x i64> %0, %1
14203 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14204 %4 = bitcast <32 x i1> %3 to i32
14208 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14209 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14210 ; VLX: # %bb.0: # %entry
14211 ; VLX-NEXT: kmovd %edi, %k1
14212 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14213 ; VLX-NEXT: kmovd %k0, %eax
14214 ; VLX-NEXT: vzeroupper
14217 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14218 ; NoVLX: # %bb.0: # %entry
14219 ; NoVLX-NEXT: kmovw %edi, %k1
14220 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14221 ; NoVLX-NEXT: kmovw %k0, %eax
14222 ; NoVLX-NEXT: vzeroupper
14225 %0 = bitcast <8 x i64> %__a to <8 x i64>
14226 %1 = bitcast <8 x i64> %__b to <8 x i64>
14227 %2 = icmp sge <8 x i64> %0, %1
14228 %3 = bitcast i8 %__u to <8 x i1>
14229 %4 = and <8 x i1> %2, %3
14230 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14231 %6 = bitcast <32 x i1> %5 to i32
14235 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14236 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14237 ; VLX: # %bb.0: # %entry
14238 ; VLX-NEXT: kmovd %edi, %k1
14239 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14240 ; VLX-NEXT: kmovd %k0, %eax
14241 ; VLX-NEXT: vzeroupper
14244 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14245 ; NoVLX: # %bb.0: # %entry
14246 ; NoVLX-NEXT: kmovw %edi, %k1
14247 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14248 ; NoVLX-NEXT: kmovw %k0, %eax
14249 ; NoVLX-NEXT: vzeroupper
14252 %0 = bitcast <8 x i64> %__a to <8 x i64>
14253 %load = load <8 x i64>, <8 x i64>* %__b
14254 %1 = bitcast <8 x i64> %load to <8 x i64>
14255 %2 = icmp sge <8 x i64> %0, %1
14256 %3 = bitcast i8 %__u to <8 x i1>
14257 %4 = and <8 x i1> %2, %3
14258 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14259 %6 = bitcast <32 x i1> %5 to i32
14264 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14265 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14266 ; VLX: # %bb.0: # %entry
14267 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14268 ; VLX-NEXT: kmovd %k0, %eax
14269 ; VLX-NEXT: vzeroupper
14272 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14273 ; NoVLX: # %bb.0: # %entry
14274 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14275 ; NoVLX-NEXT: kmovw %k0, %eax
14276 ; NoVLX-NEXT: vzeroupper
14279 %0 = bitcast <8 x i64> %__a to <8 x i64>
14280 %load = load i64, i64* %__b
14281 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14282 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14283 %2 = icmp sge <8 x i64> %0, %1
14284 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14285 %4 = bitcast <32 x i1> %3 to i32
14289 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14290 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14291 ; VLX: # %bb.0: # %entry
14292 ; VLX-NEXT: kmovd %edi, %k1
14293 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14294 ; VLX-NEXT: kmovd %k0, %eax
14295 ; VLX-NEXT: vzeroupper
14298 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14299 ; NoVLX: # %bb.0: # %entry
14300 ; NoVLX-NEXT: kmovw %edi, %k1
14301 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14302 ; NoVLX-NEXT: kmovw %k0, %eax
14303 ; NoVLX-NEXT: vzeroupper
14306 %0 = bitcast <8 x i64> %__a to <8 x i64>
14307 %load = load i64, i64* %__b
14308 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14309 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14310 %2 = icmp sge <8 x i64> %0, %1
14311 %3 = bitcast i8 %__u to <8 x i1>
14312 %4 = and <8 x i1> %3, %2
14313 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14314 %6 = bitcast <32 x i1> %5 to i32
14319 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14320 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14321 ; VLX: # %bb.0: # %entry
14322 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14323 ; VLX-NEXT: kmovq %k0, %rax
14324 ; VLX-NEXT: vzeroupper
14327 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14328 ; NoVLX: # %bb.0: # %entry
14329 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14330 ; NoVLX-NEXT: kmovw %k0, %eax
14331 ; NoVLX-NEXT: vzeroupper
14334 %0 = bitcast <8 x i64> %__a to <8 x i64>
14335 %1 = bitcast <8 x i64> %__b to <8 x i64>
14336 %2 = icmp sge <8 x i64> %0, %1
14337 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14338 %4 = bitcast <64 x i1> %3 to i64
14342 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14343 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14344 ; VLX: # %bb.0: # %entry
14345 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14346 ; VLX-NEXT: kmovq %k0, %rax
14347 ; VLX-NEXT: vzeroupper
14350 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14351 ; NoVLX: # %bb.0: # %entry
14352 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14353 ; NoVLX-NEXT: kmovw %k0, %eax
14354 ; NoVLX-NEXT: vzeroupper
14357 %0 = bitcast <8 x i64> %__a to <8 x i64>
14358 %load = load <8 x i64>, <8 x i64>* %__b
14359 %1 = bitcast <8 x i64> %load to <8 x i64>
14360 %2 = icmp sge <8 x i64> %0, %1
14361 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14362 %4 = bitcast <64 x i1> %3 to i64
14366 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14367 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14368 ; VLX: # %bb.0: # %entry
14369 ; VLX-NEXT: kmovd %edi, %k1
14370 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14371 ; VLX-NEXT: kmovq %k0, %rax
14372 ; VLX-NEXT: vzeroupper
14375 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14376 ; NoVLX: # %bb.0: # %entry
14377 ; NoVLX-NEXT: kmovw %edi, %k1
14378 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14379 ; NoVLX-NEXT: kmovw %k0, %eax
14380 ; NoVLX-NEXT: vzeroupper
14383 %0 = bitcast <8 x i64> %__a to <8 x i64>
14384 %1 = bitcast <8 x i64> %__b to <8 x i64>
14385 %2 = icmp sge <8 x i64> %0, %1
14386 %3 = bitcast i8 %__u to <8 x i1>
14387 %4 = and <8 x i1> %2, %3
14388 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14389 %6 = bitcast <64 x i1> %5 to i64
14393 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14394 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14395 ; VLX: # %bb.0: # %entry
14396 ; VLX-NEXT: kmovd %edi, %k1
14397 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14398 ; VLX-NEXT: kmovq %k0, %rax
14399 ; VLX-NEXT: vzeroupper
14402 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14403 ; NoVLX: # %bb.0: # %entry
14404 ; NoVLX-NEXT: kmovw %edi, %k1
14405 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14406 ; NoVLX-NEXT: kmovw %k0, %eax
14407 ; NoVLX-NEXT: vzeroupper
14410 %0 = bitcast <8 x i64> %__a to <8 x i64>
14411 %load = load <8 x i64>, <8 x i64>* %__b
14412 %1 = bitcast <8 x i64> %load to <8 x i64>
14413 %2 = icmp sge <8 x i64> %0, %1
14414 %3 = bitcast i8 %__u to <8 x i1>
14415 %4 = and <8 x i1> %2, %3
14416 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14417 %6 = bitcast <64 x i1> %5 to i64
14422 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14423 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14424 ; VLX: # %bb.0: # %entry
14425 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14426 ; VLX-NEXT: kmovq %k0, %rax
14427 ; VLX-NEXT: vzeroupper
14430 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14431 ; NoVLX: # %bb.0: # %entry
14432 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14433 ; NoVLX-NEXT: kmovw %k0, %eax
14434 ; NoVLX-NEXT: vzeroupper
14437 %0 = bitcast <8 x i64> %__a to <8 x i64>
14438 %load = load i64, i64* %__b
14439 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14440 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14441 %2 = icmp sge <8 x i64> %0, %1
14442 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14443 %4 = bitcast <64 x i1> %3 to i64
14447 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14448 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14449 ; VLX: # %bb.0: # %entry
14450 ; VLX-NEXT: kmovd %edi, %k1
14451 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14452 ; VLX-NEXT: kmovq %k0, %rax
14453 ; VLX-NEXT: vzeroupper
14456 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14457 ; NoVLX: # %bb.0: # %entry
14458 ; NoVLX-NEXT: kmovw %edi, %k1
14459 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14460 ; NoVLX-NEXT: kmovw %k0, %eax
14461 ; NoVLX-NEXT: vzeroupper
14464 %0 = bitcast <8 x i64> %__a to <8 x i64>
14465 %load = load i64, i64* %__b
14466 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14467 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14468 %2 = icmp sge <8 x i64> %0, %1
14469 %3 = bitcast i8 %__u to <8 x i1>
14470 %4 = and <8 x i1> %3, %2
14471 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14472 %6 = bitcast <64 x i1> %5 to i64
14477 define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14478 ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14479 ; VLX: # %bb.0: # %entry
14480 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
14481 ; VLX-NEXT: kmovd %k0, %eax
14484 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14485 ; NoVLX: # %bb.0: # %entry
14486 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14487 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14488 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14489 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14490 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14491 ; NoVLX-NEXT: kmovw %k0, %eax
14492 ; NoVLX-NEXT: vzeroupper
14495 %0 = bitcast <2 x i64> %__a to <16 x i8>
14496 %1 = bitcast <2 x i64> %__b to <16 x i8>
14497 %2 = icmp ult <16 x i8> %0, %1
14498 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14499 %4 = bitcast <32 x i1> %3 to i32
14503 define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14504 ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14505 ; VLX: # %bb.0: # %entry
14506 ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
14507 ; VLX-NEXT: kmovd %k0, %eax
14510 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14511 ; NoVLX: # %bb.0: # %entry
14512 ; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1
14513 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14514 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14515 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14516 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14517 ; NoVLX-NEXT: kmovw %k0, %eax
14518 ; NoVLX-NEXT: vzeroupper
14521 %0 = bitcast <2 x i64> %__a to <16 x i8>
14522 %load = load <2 x i64>, <2 x i64>* %__b
14523 %1 = bitcast <2 x i64> %load to <16 x i8>
14524 %2 = icmp ult <16 x i8> %0, %1
14525 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14526 %4 = bitcast <32 x i1> %3 to i32
14530 define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14531 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14532 ; VLX: # %bb.0: # %entry
14533 ; VLX-NEXT: kmovd %edi, %k1
14534 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14535 ; VLX-NEXT: kmovd %k0, %eax
14538 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14539 ; NoVLX: # %bb.0: # %entry
14540 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14541 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14542 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14543 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14544 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14545 ; NoVLX-NEXT: kmovw %k0, %eax
14546 ; NoVLX-NEXT: andl %edi, %eax
14547 ; NoVLX-NEXT: vzeroupper
14550 %0 = bitcast <2 x i64> %__a to <16 x i8>
14551 %1 = bitcast <2 x i64> %__b to <16 x i8>
14552 %2 = icmp ult <16 x i8> %0, %1
14553 %3 = bitcast i16 %__u to <16 x i1>
14554 %4 = and <16 x i1> %2, %3
14555 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14556 %6 = bitcast <32 x i1> %5 to i32
14560 define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14561 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14562 ; VLX: # %bb.0: # %entry
14563 ; VLX-NEXT: kmovd %edi, %k1
14564 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14565 ; VLX-NEXT: kmovd %k0, %eax
14568 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14569 ; NoVLX: # %bb.0: # %entry
14570 ; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1
14571 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14572 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14573 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14574 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14575 ; NoVLX-NEXT: kmovw %k0, %eax
14576 ; NoVLX-NEXT: andl %edi, %eax
14577 ; NoVLX-NEXT: vzeroupper
14580 %0 = bitcast <2 x i64> %__a to <16 x i8>
14581 %load = load <2 x i64>, <2 x i64>* %__b
14582 %1 = bitcast <2 x i64> %load to <16 x i8>
14583 %2 = icmp ult <16 x i8> %0, %1
14584 %3 = bitcast i16 %__u to <16 x i1>
14585 %4 = and <16 x i1> %2, %3
14586 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14587 %6 = bitcast <32 x i1> %5 to i32
14592 define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14593 ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14594 ; VLX: # %bb.0: # %entry
14595 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
14596 ; VLX-NEXT: kmovq %k0, %rax
14599 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14600 ; NoVLX: # %bb.0: # %entry
14601 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14602 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14603 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14604 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14605 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14606 ; NoVLX-NEXT: kmovw %k0, %eax
14607 ; NoVLX-NEXT: vzeroupper
14610 %0 = bitcast <2 x i64> %__a to <16 x i8>
14611 %1 = bitcast <2 x i64> %__b to <16 x i8>
14612 %2 = icmp ult <16 x i8> %0, %1
14613 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14614 %4 = bitcast <64 x i1> %3 to i64
14618 define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14619 ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14620 ; VLX: # %bb.0: # %entry
14621 ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
14622 ; VLX-NEXT: kmovq %k0, %rax
14625 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14626 ; NoVLX: # %bb.0: # %entry
14627 ; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1
14628 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14629 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14630 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14631 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14632 ; NoVLX-NEXT: kmovw %k0, %eax
14633 ; NoVLX-NEXT: vzeroupper
14636 %0 = bitcast <2 x i64> %__a to <16 x i8>
14637 %load = load <2 x i64>, <2 x i64>* %__b
14638 %1 = bitcast <2 x i64> %load to <16 x i8>
14639 %2 = icmp ult <16 x i8> %0, %1
14640 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14641 %4 = bitcast <64 x i1> %3 to i64
14645 define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14646 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14647 ; VLX: # %bb.0: # %entry
14648 ; VLX-NEXT: kmovd %edi, %k1
14649 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14650 ; VLX-NEXT: kmovq %k0, %rax
14653 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14654 ; NoVLX: # %bb.0: # %entry
14655 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14656 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14657 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14658 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14659 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14660 ; NoVLX-NEXT: kmovw %k0, %eax
14661 ; NoVLX-NEXT: andl %edi, %eax
14662 ; NoVLX-NEXT: vzeroupper
14665 %0 = bitcast <2 x i64> %__a to <16 x i8>
14666 %1 = bitcast <2 x i64> %__b to <16 x i8>
14667 %2 = icmp ult <16 x i8> %0, %1
14668 %3 = bitcast i16 %__u to <16 x i1>
14669 %4 = and <16 x i1> %2, %3
14670 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14671 %6 = bitcast <64 x i1> %5 to i64
14675 define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14676 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14677 ; VLX: # %bb.0: # %entry
14678 ; VLX-NEXT: kmovd %edi, %k1
14679 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14680 ; VLX-NEXT: kmovq %k0, %rax
14683 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14684 ; NoVLX: # %bb.0: # %entry
14685 ; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1
14686 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14687 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14688 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14689 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14690 ; NoVLX-NEXT: kmovw %k0, %eax
14691 ; NoVLX-NEXT: andl %edi, %eax
14692 ; NoVLX-NEXT: vzeroupper
14695 %0 = bitcast <2 x i64> %__a to <16 x i8>
14696 %load = load <2 x i64>, <2 x i64>* %__b
14697 %1 = bitcast <2 x i64> %load to <16 x i8>
14698 %2 = icmp ult <16 x i8> %0, %1
14699 %3 = bitcast i16 %__u to <16 x i1>
14700 %4 = and <16 x i1> %2, %3
14701 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14702 %6 = bitcast <64 x i1> %5 to i64
14707 define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14708 ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
14709 ; VLX: # %bb.0: # %entry
14710 ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0
14711 ; VLX-NEXT: kmovq %k0, %rax
14712 ; VLX-NEXT: vzeroupper
14715 ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
14716 ; NoVLX: # %bb.0: # %entry
14717 ; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1
14718 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14719 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14720 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14721 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14722 ; NoVLX-NEXT: kmovw %k0, %ecx
14723 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14724 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14725 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14726 ; NoVLX-NEXT: kmovw %k0, %eax
14727 ; NoVLX-NEXT: shll $16, %eax
14728 ; NoVLX-NEXT: orl %ecx, %eax
14729 ; NoVLX-NEXT: vzeroupper
14732 %0 = bitcast <4 x i64> %__a to <32 x i8>
14733 %1 = bitcast <4 x i64> %__b to <32 x i8>
14734 %2 = icmp ult <32 x i8> %0, %1
14735 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14736 %4 = bitcast <64 x i1> %3 to i64
14740 define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
14741 ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
14742 ; VLX: # %bb.0: # %entry
14743 ; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0
14744 ; VLX-NEXT: kmovq %k0, %rax
14745 ; VLX-NEXT: vzeroupper
14748 ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
14749 ; NoVLX: # %bb.0: # %entry
14750 ; NoVLX-NEXT: vpmaxub (%rdi), %ymm0, %ymm1
14751 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14752 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14753 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14754 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14755 ; NoVLX-NEXT: kmovw %k0, %ecx
14756 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14757 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14758 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14759 ; NoVLX-NEXT: kmovw %k0, %eax
14760 ; NoVLX-NEXT: shll $16, %eax
14761 ; NoVLX-NEXT: orl %ecx, %eax
14762 ; NoVLX-NEXT: vzeroupper
14765 %0 = bitcast <4 x i64> %__a to <32 x i8>
14766 %load = load <4 x i64>, <4 x i64>* %__b
14767 %1 = bitcast <4 x i64> %load to <32 x i8>
14768 %2 = icmp ult <32 x i8> %0, %1
14769 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14770 %4 = bitcast <64 x i1> %3 to i64
14774 define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14775 ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
14776 ; VLX: # %bb.0: # %entry
14777 ; VLX-NEXT: kmovd %edi, %k1
14778 ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1}
14779 ; VLX-NEXT: kmovq %k0, %rax
14780 ; VLX-NEXT: vzeroupper
14783 ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
14784 ; NoVLX: # %bb.0: # %entry
14785 ; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1
14786 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14787 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14788 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14789 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14790 ; NoVLX-NEXT: kmovw %k0, %eax
14791 ; NoVLX-NEXT: andl %edi, %eax
14792 ; NoVLX-NEXT: shrl $16, %edi
14793 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14794 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14795 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14796 ; NoVLX-NEXT: kmovw %k0, %ecx
14797 ; NoVLX-NEXT: andl %edi, %ecx
14798 ; NoVLX-NEXT: shll $16, %ecx
14799 ; NoVLX-NEXT: movzwl %ax, %eax
14800 ; NoVLX-NEXT: orl %ecx, %eax
14801 ; NoVLX-NEXT: vzeroupper
14804 %0 = bitcast <4 x i64> %__a to <32 x i8>
14805 %1 = bitcast <4 x i64> %__b to <32 x i8>
14806 %2 = icmp ult <32 x i8> %0, %1
14807 %3 = bitcast i32 %__u to <32 x i1>
14808 %4 = and <32 x i1> %2, %3
14809 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14810 %6 = bitcast <64 x i1> %5 to i64
14814 define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
14815 ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
14816 ; VLX: # %bb.0: # %entry
14817 ; VLX-NEXT: kmovd %edi, %k1
14818 ; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1}
14819 ; VLX-NEXT: kmovq %k0, %rax
14820 ; VLX-NEXT: vzeroupper
14823 ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
14824 ; NoVLX: # %bb.0: # %entry
14825 ; NoVLX-NEXT: vpmaxub (%rsi), %ymm0, %ymm1
14826 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14827 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14828 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14829 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14830 ; NoVLX-NEXT: kmovw %k0, %eax
14831 ; NoVLX-NEXT: andl %edi, %eax
14832 ; NoVLX-NEXT: shrl $16, %edi
14833 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14834 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14835 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14836 ; NoVLX-NEXT: kmovw %k0, %ecx
14837 ; NoVLX-NEXT: andl %edi, %ecx
14838 ; NoVLX-NEXT: shll $16, %ecx
14839 ; NoVLX-NEXT: movzwl %ax, %eax
14840 ; NoVLX-NEXT: orl %ecx, %eax
14841 ; NoVLX-NEXT: vzeroupper
14844 %0 = bitcast <4 x i64> %__a to <32 x i8>
14845 %load = load <4 x i64>, <4 x i64>* %__b
14846 %1 = bitcast <4 x i64> %load to <32 x i8>
14847 %2 = icmp ult <32 x i8> %0, %1
14848 %3 = bitcast i32 %__u to <32 x i1>
14849 %4 = and <32 x i1> %2, %3
14850 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14851 %6 = bitcast <64 x i1> %5 to i64
14856 define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14857 ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
14858 ; VLX: # %bb.0: # %entry
14859 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
14860 ; VLX-NEXT: kmovd %k0, %eax
14861 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14864 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
14865 ; NoVLX: # %bb.0: # %entry
14866 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
14867 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14868 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14869 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14870 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14871 ; NoVLX-NEXT: kmovw %k0, %eax
14872 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14873 ; NoVLX-NEXT: vzeroupper
14876 %0 = bitcast <2 x i64> %__a to <8 x i16>
14877 %1 = bitcast <2 x i64> %__b to <8 x i16>
14878 %2 = icmp ult <8 x i16> %0, %1
14879 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14880 %4 = bitcast <16 x i1> %3 to i16
14884 define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14885 ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
14886 ; VLX: # %bb.0: # %entry
14887 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
14888 ; VLX-NEXT: kmovd %k0, %eax
14889 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14892 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
14893 ; NoVLX: # %bb.0: # %entry
14894 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
14895 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14896 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14897 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14898 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14899 ; NoVLX-NEXT: kmovw %k0, %eax
14900 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14901 ; NoVLX-NEXT: vzeroupper
14904 %0 = bitcast <2 x i64> %__a to <8 x i16>
14905 %load = load <2 x i64>, <2 x i64>* %__b
14906 %1 = bitcast <2 x i64> %load to <8 x i16>
14907 %2 = icmp ult <8 x i16> %0, %1
14908 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14909 %4 = bitcast <16 x i1> %3 to i16
14913 define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14914 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
14915 ; VLX: # %bb.0: # %entry
14916 ; VLX-NEXT: kmovd %edi, %k1
14917 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
14918 ; VLX-NEXT: kmovd %k0, %eax
14919 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14922 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
14923 ; NoVLX: # %bb.0: # %entry
14924 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
14925 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14926 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14927 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14928 ; NoVLX-NEXT: kmovw %edi, %k1
14929 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
14930 ; NoVLX-NEXT: kmovw %k0, %eax
14931 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14932 ; NoVLX-NEXT: vzeroupper
14935 %0 = bitcast <2 x i64> %__a to <8 x i16>
14936 %1 = bitcast <2 x i64> %__b to <8 x i16>
14937 %2 = icmp ult <8 x i16> %0, %1
14938 %3 = bitcast i8 %__u to <8 x i1>
14939 %4 = and <8 x i1> %2, %3
14940 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14941 %6 = bitcast <16 x i1> %5 to i16
14945 define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14946 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
14947 ; VLX: # %bb.0: # %entry
14948 ; VLX-NEXT: kmovd %edi, %k1
14949 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
14950 ; VLX-NEXT: kmovd %k0, %eax
14951 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14954 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
14955 ; NoVLX: # %bb.0: # %entry
14956 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
14957 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14958 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14959 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14960 ; NoVLX-NEXT: kmovw %edi, %k1
14961 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
14962 ; NoVLX-NEXT: kmovw %k0, %eax
14963 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14964 ; NoVLX-NEXT: vzeroupper
14967 %0 = bitcast <2 x i64> %__a to <8 x i16>
14968 %load = load <2 x i64>, <2 x i64>* %__b
14969 %1 = bitcast <2 x i64> %load to <8 x i16>
14970 %2 = icmp ult <8 x i16> %0, %1
14971 %3 = bitcast i8 %__u to <8 x i1>
14972 %4 = and <8 x i1> %2, %3
14973 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14974 %6 = bitcast <16 x i1> %5 to i16
14979 define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14980 ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
14981 ; VLX: # %bb.0: # %entry
14982 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
14983 ; VLX-NEXT: kmovd %k0, %eax
14986 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
14987 ; NoVLX: # %bb.0: # %entry
14988 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
14989 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14990 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14991 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14992 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14993 ; NoVLX-NEXT: kmovw %k0, %eax
14994 ; NoVLX-NEXT: vzeroupper
14997 %0 = bitcast <2 x i64> %__a to <8 x i16>
14998 %1 = bitcast <2 x i64> %__b to <8 x i16>
14999 %2 = icmp ult <8 x i16> %0, %1
15000 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15001 %4 = bitcast <32 x i1> %3 to i32
15005 define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15006 ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15007 ; VLX: # %bb.0: # %entry
15008 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
15009 ; VLX-NEXT: kmovd %k0, %eax
15012 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15013 ; NoVLX: # %bb.0: # %entry
15014 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
15015 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15016 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15017 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15018 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15019 ; NoVLX-NEXT: kmovw %k0, %eax
15020 ; NoVLX-NEXT: vzeroupper
15023 %0 = bitcast <2 x i64> %__a to <8 x i16>
15024 %load = load <2 x i64>, <2 x i64>* %__b
15025 %1 = bitcast <2 x i64> %load to <8 x i16>
15026 %2 = icmp ult <8 x i16> %0, %1
15027 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15028 %4 = bitcast <32 x i1> %3 to i32
15032 define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15033 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15034 ; VLX: # %bb.0: # %entry
15035 ; VLX-NEXT: kmovd %edi, %k1
15036 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15037 ; VLX-NEXT: kmovd %k0, %eax
15040 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15041 ; NoVLX: # %bb.0: # %entry
15042 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15043 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15044 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15045 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15046 ; NoVLX-NEXT: kmovw %edi, %k1
15047 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15048 ; NoVLX-NEXT: kmovw %k0, %eax
15049 ; NoVLX-NEXT: vzeroupper
15052 %0 = bitcast <2 x i64> %__a to <8 x i16>
15053 %1 = bitcast <2 x i64> %__b to <8 x i16>
15054 %2 = icmp ult <8 x i16> %0, %1
15055 %3 = bitcast i8 %__u to <8 x i1>
15056 %4 = and <8 x i1> %2, %3
15057 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15058 %6 = bitcast <32 x i1> %5 to i32
15062 define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15063 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15064 ; VLX: # %bb.0: # %entry
15065 ; VLX-NEXT: kmovd %edi, %k1
15066 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15067 ; VLX-NEXT: kmovd %k0, %eax
15070 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15071 ; NoVLX: # %bb.0: # %entry
15072 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
15073 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15074 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15075 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15076 ; NoVLX-NEXT: kmovw %edi, %k1
15077 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15078 ; NoVLX-NEXT: kmovw %k0, %eax
15079 ; NoVLX-NEXT: vzeroupper
15082 %0 = bitcast <2 x i64> %__a to <8 x i16>
15083 %load = load <2 x i64>, <2 x i64>* %__b
15084 %1 = bitcast <2 x i64> %load to <8 x i16>
15085 %2 = icmp ult <8 x i16> %0, %1
15086 %3 = bitcast i8 %__u to <8 x i1>
15087 %4 = and <8 x i1> %2, %3
15088 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15089 %6 = bitcast <32 x i1> %5 to i32
15094 define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15095 ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15096 ; VLX: # %bb.0: # %entry
15097 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
15098 ; VLX-NEXT: kmovq %k0, %rax
15101 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15102 ; NoVLX: # %bb.0: # %entry
15103 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15104 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15105 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15106 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15107 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15108 ; NoVLX-NEXT: kmovw %k0, %eax
15109 ; NoVLX-NEXT: vzeroupper
15112 %0 = bitcast <2 x i64> %__a to <8 x i16>
15113 %1 = bitcast <2 x i64> %__b to <8 x i16>
15114 %2 = icmp ult <8 x i16> %0, %1
15115 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15116 %4 = bitcast <64 x i1> %3 to i64
15120 define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15121 ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15122 ; VLX: # %bb.0: # %entry
15123 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
15124 ; VLX-NEXT: kmovq %k0, %rax
15127 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15128 ; NoVLX: # %bb.0: # %entry
15129 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
15130 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15131 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15132 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15133 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15134 ; NoVLX-NEXT: kmovw %k0, %eax
15135 ; NoVLX-NEXT: vzeroupper
15138 %0 = bitcast <2 x i64> %__a to <8 x i16>
15139 %load = load <2 x i64>, <2 x i64>* %__b
15140 %1 = bitcast <2 x i64> %load to <8 x i16>
15141 %2 = icmp ult <8 x i16> %0, %1
15142 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15143 %4 = bitcast <64 x i1> %3 to i64
15147 define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15148 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15149 ; VLX: # %bb.0: # %entry
15150 ; VLX-NEXT: kmovd %edi, %k1
15151 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15152 ; VLX-NEXT: kmovq %k0, %rax
15155 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15156 ; NoVLX: # %bb.0: # %entry
15157 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15158 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15159 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15160 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15161 ; NoVLX-NEXT: kmovw %edi, %k1
15162 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15163 ; NoVLX-NEXT: kmovw %k0, %eax
15164 ; NoVLX-NEXT: vzeroupper
15167 %0 = bitcast <2 x i64> %__a to <8 x i16>
15168 %1 = bitcast <2 x i64> %__b to <8 x i16>
15169 %2 = icmp ult <8 x i16> %0, %1
15170 %3 = bitcast i8 %__u to <8 x i1>
15171 %4 = and <8 x i1> %2, %3
15172 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15173 %6 = bitcast <64 x i1> %5 to i64
15177 define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15178 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15179 ; VLX: # %bb.0: # %entry
15180 ; VLX-NEXT: kmovd %edi, %k1
15181 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15182 ; VLX-NEXT: kmovq %k0, %rax
15185 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15186 ; NoVLX: # %bb.0: # %entry
15187 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
15188 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15189 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15190 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15191 ; NoVLX-NEXT: kmovw %edi, %k1
15192 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15193 ; NoVLX-NEXT: kmovw %k0, %eax
15194 ; NoVLX-NEXT: vzeroupper
15197 %0 = bitcast <2 x i64> %__a to <8 x i16>
15198 %load = load <2 x i64>, <2 x i64>* %__b
15199 %1 = bitcast <2 x i64> %load to <8 x i16>
15200 %2 = icmp ult <8 x i16> %0, %1
15201 %3 = bitcast i8 %__u to <8 x i1>
15202 %4 = and <8 x i1> %2, %3
15203 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15204 %6 = bitcast <64 x i1> %5 to i64
15209 define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15210 ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15211 ; VLX: # %bb.0: # %entry
15212 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
15213 ; VLX-NEXT: kmovd %k0, %eax
15214 ; VLX-NEXT: vzeroupper
15217 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15218 ; NoVLX: # %bb.0: # %entry
15219 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15220 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15221 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15222 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15223 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15224 ; NoVLX-NEXT: kmovw %k0, %eax
15225 ; NoVLX-NEXT: vzeroupper
15228 %0 = bitcast <4 x i64> %__a to <16 x i16>
15229 %1 = bitcast <4 x i64> %__b to <16 x i16>
15230 %2 = icmp ult <16 x i16> %0, %1
15231 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15232 %4 = bitcast <32 x i1> %3 to i32
15236 define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15237 ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15238 ; VLX: # %bb.0: # %entry
15239 ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
15240 ; VLX-NEXT: kmovd %k0, %eax
15241 ; VLX-NEXT: vzeroupper
15244 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15245 ; NoVLX: # %bb.0: # %entry
15246 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1
15247 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15248 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15249 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15250 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15251 ; NoVLX-NEXT: kmovw %k0, %eax
15252 ; NoVLX-NEXT: vzeroupper
15255 %0 = bitcast <4 x i64> %__a to <16 x i16>
15256 %load = load <4 x i64>, <4 x i64>* %__b
15257 %1 = bitcast <4 x i64> %load to <16 x i16>
15258 %2 = icmp ult <16 x i16> %0, %1
15259 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15260 %4 = bitcast <32 x i1> %3 to i32
15264 define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15265 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15266 ; VLX: # %bb.0: # %entry
15267 ; VLX-NEXT: kmovd %edi, %k1
15268 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15269 ; VLX-NEXT: kmovd %k0, %eax
15270 ; VLX-NEXT: vzeroupper
15273 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15274 ; NoVLX: # %bb.0: # %entry
15275 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15276 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15277 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15278 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15279 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15280 ; NoVLX-NEXT: kmovw %k0, %eax
15281 ; NoVLX-NEXT: andl %edi, %eax
15282 ; NoVLX-NEXT: vzeroupper
15285 %0 = bitcast <4 x i64> %__a to <16 x i16>
15286 %1 = bitcast <4 x i64> %__b to <16 x i16>
15287 %2 = icmp ult <16 x i16> %0, %1
15288 %3 = bitcast i16 %__u to <16 x i1>
15289 %4 = and <16 x i1> %2, %3
15290 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15291 %6 = bitcast <32 x i1> %5 to i32
15295 define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15296 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15297 ; VLX: # %bb.0: # %entry
15298 ; VLX-NEXT: kmovd %edi, %k1
15299 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15300 ; VLX-NEXT: kmovd %k0, %eax
15301 ; VLX-NEXT: vzeroupper
15304 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15305 ; NoVLX: # %bb.0: # %entry
15306 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15307 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15308 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15309 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15310 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15311 ; NoVLX-NEXT: kmovw %k0, %eax
15312 ; NoVLX-NEXT: andl %edi, %eax
15313 ; NoVLX-NEXT: vzeroupper
15316 %0 = bitcast <4 x i64> %__a to <16 x i16>
15317 %load = load <4 x i64>, <4 x i64>* %__b
15318 %1 = bitcast <4 x i64> %load to <16 x i16>
15319 %2 = icmp ult <16 x i16> %0, %1
15320 %3 = bitcast i16 %__u to <16 x i1>
15321 %4 = and <16 x i1> %2, %3
15322 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15323 %6 = bitcast <32 x i1> %5 to i32
15328 define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15329 ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15330 ; VLX: # %bb.0: # %entry
15331 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
15332 ; VLX-NEXT: kmovq %k0, %rax
15333 ; VLX-NEXT: vzeroupper
15336 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15337 ; NoVLX: # %bb.0: # %entry
15338 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15339 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15340 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15341 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15342 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15343 ; NoVLX-NEXT: kmovw %k0, %eax
15344 ; NoVLX-NEXT: vzeroupper
15347 %0 = bitcast <4 x i64> %__a to <16 x i16>
15348 %1 = bitcast <4 x i64> %__b to <16 x i16>
15349 %2 = icmp ult <16 x i16> %0, %1
15350 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15351 %4 = bitcast <64 x i1> %3 to i64
15355 define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15356 ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15357 ; VLX: # %bb.0: # %entry
15358 ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
15359 ; VLX-NEXT: kmovq %k0, %rax
15360 ; VLX-NEXT: vzeroupper
15363 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15364 ; NoVLX: # %bb.0: # %entry
15365 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1
15366 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15367 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15368 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15369 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15370 ; NoVLX-NEXT: kmovw %k0, %eax
15371 ; NoVLX-NEXT: vzeroupper
15374 %0 = bitcast <4 x i64> %__a to <16 x i16>
15375 %load = load <4 x i64>, <4 x i64>* %__b
15376 %1 = bitcast <4 x i64> %load to <16 x i16>
15377 %2 = icmp ult <16 x i16> %0, %1
15378 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15379 %4 = bitcast <64 x i1> %3 to i64
15383 define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15384 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15385 ; VLX: # %bb.0: # %entry
15386 ; VLX-NEXT: kmovd %edi, %k1
15387 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15388 ; VLX-NEXT: kmovq %k0, %rax
15389 ; VLX-NEXT: vzeroupper
15392 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15393 ; NoVLX: # %bb.0: # %entry
15394 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15395 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15396 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15397 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15398 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15399 ; NoVLX-NEXT: kmovw %k0, %eax
15400 ; NoVLX-NEXT: andl %edi, %eax
15401 ; NoVLX-NEXT: vzeroupper
15404 %0 = bitcast <4 x i64> %__a to <16 x i16>
15405 %1 = bitcast <4 x i64> %__b to <16 x i16>
15406 %2 = icmp ult <16 x i16> %0, %1
15407 %3 = bitcast i16 %__u to <16 x i1>
15408 %4 = and <16 x i1> %2, %3
15409 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15410 %6 = bitcast <64 x i1> %5 to i64
15414 define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15415 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15416 ; VLX: # %bb.0: # %entry
15417 ; VLX-NEXT: kmovd %edi, %k1
15418 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15419 ; VLX-NEXT: kmovq %k0, %rax
15420 ; VLX-NEXT: vzeroupper
15423 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15424 ; NoVLX: # %bb.0: # %entry
15425 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15426 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15427 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15428 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15429 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15430 ; NoVLX-NEXT: kmovw %k0, %eax
15431 ; NoVLX-NEXT: andl %edi, %eax
15432 ; NoVLX-NEXT: vzeroupper
15435 %0 = bitcast <4 x i64> %__a to <16 x i16>
15436 %load = load <4 x i64>, <4 x i64>* %__b
15437 %1 = bitcast <4 x i64> %load to <16 x i16>
15438 %2 = icmp ult <16 x i16> %0, %1
15439 %3 = bitcast i16 %__u to <16 x i1>
15440 %4 = and <16 x i1> %2, %3
15441 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15442 %6 = bitcast <64 x i1> %5 to i64
15447 define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15448 ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15449 ; VLX: # %bb.0: # %entry
15450 ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
15451 ; VLX-NEXT: kmovq %k0, %rax
15452 ; VLX-NEXT: vzeroupper
15455 ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15456 ; NoVLX: # %bb.0: # %entry
15457 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
15458 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
15459 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15460 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15461 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15462 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15463 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15464 ; NoVLX-NEXT: kmovw %k0, %ecx
15465 ; NoVLX-NEXT: vpmaxuw %ymm3, %ymm2, %ymm0
15466 ; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
15467 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15468 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15469 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15470 ; NoVLX-NEXT: kmovw %k0, %eax
15471 ; NoVLX-NEXT: shll $16, %eax
15472 ; NoVLX-NEXT: orl %ecx, %eax
15473 ; NoVLX-NEXT: vzeroupper
15476 %0 = bitcast <8 x i64> %__a to <32 x i16>
15477 %1 = bitcast <8 x i64> %__b to <32 x i16>
15478 %2 = icmp ult <32 x i16> %0, %1
15479 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15480 %4 = bitcast <64 x i1> %3 to i64
15484 define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
15485 ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15486 ; VLX: # %bb.0: # %entry
15487 ; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0
15488 ; VLX-NEXT: kmovq %k0, %rax
15489 ; VLX-NEXT: vzeroupper
15492 ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15493 ; NoVLX: # %bb.0: # %entry
15494 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
15495 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm2
15496 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
15497 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15498 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15499 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15500 ; NoVLX-NEXT: kmovw %k0, %ecx
15501 ; NoVLX-NEXT: vpmaxuw 32(%rdi), %ymm1, %ymm0
15502 ; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
15503 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15504 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15505 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15506 ; NoVLX-NEXT: kmovw %k0, %eax
15507 ; NoVLX-NEXT: shll $16, %eax
15508 ; NoVLX-NEXT: orl %ecx, %eax
15509 ; NoVLX-NEXT: vzeroupper
15512 %0 = bitcast <8 x i64> %__a to <32 x i16>
15513 %load = load <8 x i64>, <8 x i64>* %__b
15514 %1 = bitcast <8 x i64> %load to <32 x i16>
15515 %2 = icmp ult <32 x i16> %0, %1
15516 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15517 %4 = bitcast <64 x i1> %3 to i64
15521 define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15522 ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15523 ; VLX: # %bb.0: # %entry
15524 ; VLX-NEXT: kmovd %edi, %k1
15525 ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
15526 ; VLX-NEXT: kmovq %k0, %rax
15527 ; VLX-NEXT: vzeroupper
15530 ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15531 ; NoVLX: # %bb.0: # %entry
15532 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
15533 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
15534 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
15535 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
15536 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
15537 ; NoVLX-NEXT: kmovw %k0, %eax
15538 ; NoVLX-NEXT: andl %edi, %eax
15539 ; NoVLX-NEXT: shrl $16, %edi
15540 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
15541 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
15542 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15543 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15544 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15545 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15546 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15547 ; NoVLX-NEXT: kmovw %k0, %ecx
15548 ; NoVLX-NEXT: andl %edi, %ecx
15549 ; NoVLX-NEXT: shll $16, %ecx
15550 ; NoVLX-NEXT: movzwl %ax, %eax
15551 ; NoVLX-NEXT: orl %ecx, %eax
15552 ; NoVLX-NEXT: vzeroupper
15555 %0 = bitcast <8 x i64> %__a to <32 x i16>
15556 %1 = bitcast <8 x i64> %__b to <32 x i16>
15557 %2 = icmp ult <32 x i16> %0, %1
15558 %3 = bitcast i32 %__u to <32 x i1>
15559 %4 = and <32 x i1> %2, %3
15560 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15561 %6 = bitcast <64 x i1> %5 to i64
15565 define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
15566 ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15567 ; VLX: # %bb.0: # %entry
15568 ; VLX-NEXT: kmovd %edi, %k1
15569 ; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
15570 ; VLX-NEXT: kmovq %k0, %rax
15571 ; VLX-NEXT: vzeroupper
15574 ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15575 ; NoVLX: # %bb.0: # %entry
15576 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15577 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
15578 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
15579 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
15580 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
15581 ; NoVLX-NEXT: kmovw %k0, %eax
15582 ; NoVLX-NEXT: andl %edi, %eax
15583 ; NoVLX-NEXT: shrl $16, %edi
15584 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
15585 ; NoVLX-NEXT: vpmaxuw 32(%rsi), %ymm0, %ymm1
15586 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15587 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15588 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15589 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15590 ; NoVLX-NEXT: kmovw %k0, %ecx
15591 ; NoVLX-NEXT: andl %edi, %ecx
15592 ; NoVLX-NEXT: shll $16, %ecx
15593 ; NoVLX-NEXT: movzwl %ax, %eax
15594 ; NoVLX-NEXT: orl %ecx, %eax
15595 ; NoVLX-NEXT: vzeroupper
15598 %0 = bitcast <8 x i64> %__a to <32 x i16>
15599 %load = load <8 x i64>, <8 x i64>* %__b
15600 %1 = bitcast <8 x i64> %load to <32 x i16>
15601 %2 = icmp ult <32 x i16> %0, %1
15602 %3 = bitcast i32 %__u to <32 x i1>
15603 %4 = and <32 x i1> %2, %3
15604 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15605 %6 = bitcast <64 x i1> %5 to i64
15610 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15611 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15612 ; VLX: # %bb.0: # %entry
15613 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
15614 ; VLX-NEXT: kmovd %k0, %eax
15615 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15618 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15619 ; NoVLX: # %bb.0: # %entry
15620 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15621 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15622 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15623 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15624 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15625 ; NoVLX-NEXT: kmovw %k0, %eax
15626 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15627 ; NoVLX-NEXT: vzeroupper
15630 %0 = bitcast <2 x i64> %__a to <4 x i32>
15631 %1 = bitcast <2 x i64> %__b to <4 x i32>
15632 %2 = icmp ult <4 x i32> %0, %1
15633 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15634 %4 = bitcast <8 x i1> %3 to i8
15638 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15639 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15640 ; VLX: # %bb.0: # %entry
15641 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
15642 ; VLX-NEXT: kmovd %k0, %eax
15643 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15646 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15647 ; NoVLX: # %bb.0: # %entry
15648 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15649 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
15650 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15651 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15652 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15653 ; NoVLX-NEXT: kmovw %k0, %eax
15654 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15655 ; NoVLX-NEXT: vzeroupper
15658 %0 = bitcast <2 x i64> %__a to <4 x i32>
15659 %load = load <2 x i64>, <2 x i64>* %__b
15660 %1 = bitcast <2 x i64> %load to <4 x i32>
15661 %2 = icmp ult <4 x i32> %0, %1
15662 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15663 %4 = bitcast <8 x i1> %3 to i8
15667 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15668 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
15669 ; VLX: # %bb.0: # %entry
15670 ; VLX-NEXT: kmovd %edi, %k1
15671 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
15672 ; VLX-NEXT: kmovd %k0, %eax
15673 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15676 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
15677 ; NoVLX: # %bb.0: # %entry
15678 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15679 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15680 ; NoVLX-NEXT: kmovw %edi, %k1
15681 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15682 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15683 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15684 ; NoVLX-NEXT: kmovw %k0, %eax
15685 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15686 ; NoVLX-NEXT: vzeroupper
15689 %0 = bitcast <2 x i64> %__a to <4 x i32>
15690 %1 = bitcast <2 x i64> %__b to <4 x i32>
15691 %2 = icmp ult <4 x i32> %0, %1
15692 %3 = bitcast i8 %__u to <8 x i1>
15693 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15694 %4 = and <4 x i1> %2, %extract.i
15695 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15696 %6 = bitcast <8 x i1> %5 to i8
15700 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15701 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
15702 ; VLX: # %bb.0: # %entry
15703 ; VLX-NEXT: kmovd %edi, %k1
15704 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
15705 ; VLX-NEXT: kmovd %k0, %eax
15706 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15709 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
15710 ; NoVLX: # %bb.0: # %entry
15711 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15712 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
15713 ; NoVLX-NEXT: kmovw %edi, %k1
15714 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15715 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15716 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15717 ; NoVLX-NEXT: kmovw %k0, %eax
15718 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15719 ; NoVLX-NEXT: vzeroupper
15722 %0 = bitcast <2 x i64> %__a to <4 x i32>
15723 %load = load <2 x i64>, <2 x i64>* %__b
15724 %1 = bitcast <2 x i64> %load to <4 x i32>
15725 %2 = icmp ult <4 x i32> %0, %1
15726 %3 = bitcast i8 %__u to <8 x i1>
15727 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15728 %4 = and <4 x i1> %2, %extract.i
15729 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15730 %6 = bitcast <8 x i1> %5 to i8
15735 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
15736 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
15737 ; VLX: # %bb.0: # %entry
15738 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
15739 ; VLX-NEXT: kmovd %k0, %eax
15740 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15743 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
15744 ; NoVLX: # %bb.0: # %entry
15745 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15746 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
15747 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15748 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15749 ; NoVLX-NEXT: kmovw %k0, %eax
15750 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15751 ; NoVLX-NEXT: vzeroupper
15754 %0 = bitcast <2 x i64> %__a to <4 x i32>
15755 %load = load i32, i32* %__b
15756 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15757 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15758 %2 = icmp ult <4 x i32> %0, %1
15759 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15760 %4 = bitcast <8 x i1> %3 to i8
15764 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
15765 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
15766 ; VLX: # %bb.0: # %entry
15767 ; VLX-NEXT: kmovd %edi, %k1
15768 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
15769 ; VLX-NEXT: kmovd %k0, %eax
15770 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15773 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
15774 ; NoVLX: # %bb.0: # %entry
15775 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15776 ; NoVLX-NEXT: kmovw %edi, %k1
15777 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
15778 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15779 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15780 ; NoVLX-NEXT: kmovw %k0, %eax
15781 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15782 ; NoVLX-NEXT: vzeroupper
15785 %0 = bitcast <2 x i64> %__a to <4 x i32>
15786 %load = load i32, i32* %__b
15787 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15788 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15789 %2 = icmp ult <4 x i32> %0, %1
15790 %3 = bitcast i8 %__u to <8 x i1>
15791 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15792 %4 = and <4 x i1> %extract.i, %2
15793 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15794 %6 = bitcast <8 x i1> %5 to i8
15799 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15800 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
15801 ; VLX: # %bb.0: # %entry
15802 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
15803 ; VLX-NEXT: kmovd %k0, %eax
15804 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15807 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
15808 ; NoVLX: # %bb.0: # %entry
15809 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15810 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15811 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15812 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15813 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15814 ; NoVLX-NEXT: kmovw %k0, %eax
15815 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15816 ; NoVLX-NEXT: vzeroupper
15819 %0 = bitcast <2 x i64> %__a to <4 x i32>
15820 %1 = bitcast <2 x i64> %__b to <4 x i32>
15821 %2 = icmp ult <4 x i32> %0, %1
15822 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15823 %4 = bitcast <16 x i1> %3 to i16
15827 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15828 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
15829 ; VLX: # %bb.0: # %entry
15830 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
15831 ; VLX-NEXT: kmovd %k0, %eax
15832 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15835 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
15836 ; NoVLX: # %bb.0: # %entry
15837 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15838 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
15839 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15840 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15841 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15842 ; NoVLX-NEXT: kmovw %k0, %eax
15843 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15844 ; NoVLX-NEXT: vzeroupper
15847 %0 = bitcast <2 x i64> %__a to <4 x i32>
15848 %load = load <2 x i64>, <2 x i64>* %__b
15849 %1 = bitcast <2 x i64> %load to <4 x i32>
15850 %2 = icmp ult <4 x i32> %0, %1
15851 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15852 %4 = bitcast <16 x i1> %3 to i16
15856 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15857 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
15858 ; VLX: # %bb.0: # %entry
15859 ; VLX-NEXT: kmovd %edi, %k1
15860 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
15861 ; VLX-NEXT: kmovd %k0, %eax
15862 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15865 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
15866 ; NoVLX: # %bb.0: # %entry
15867 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15868 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15869 ; NoVLX-NEXT: kmovw %edi, %k1
15870 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15871 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15872 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15873 ; NoVLX-NEXT: kmovw %k0, %eax
15874 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15875 ; NoVLX-NEXT: vzeroupper
15878 %0 = bitcast <2 x i64> %__a to <4 x i32>
15879 %1 = bitcast <2 x i64> %__b to <4 x i32>
15880 %2 = icmp ult <4 x i32> %0, %1
15881 %3 = bitcast i8 %__u to <8 x i1>
15882 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15883 %4 = and <4 x i1> %2, %extract.i
15884 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15885 %6 = bitcast <16 x i1> %5 to i16
15889 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15890 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
15891 ; VLX: # %bb.0: # %entry
15892 ; VLX-NEXT: kmovd %edi, %k1
15893 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
15894 ; VLX-NEXT: kmovd %k0, %eax
15895 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15898 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
15899 ; NoVLX: # %bb.0: # %entry
15900 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15901 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
15902 ; NoVLX-NEXT: kmovw %edi, %k1
15903 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15904 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15905 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15906 ; NoVLX-NEXT: kmovw %k0, %eax
15907 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15908 ; NoVLX-NEXT: vzeroupper
15911 %0 = bitcast <2 x i64> %__a to <4 x i32>
15912 %load = load <2 x i64>, <2 x i64>* %__b
15913 %1 = bitcast <2 x i64> %load to <4 x i32>
15914 %2 = icmp ult <4 x i32> %0, %1
15915 %3 = bitcast i8 %__u to <8 x i1>
15916 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15917 %4 = and <4 x i1> %2, %extract.i
15918 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15919 %6 = bitcast <16 x i1> %5 to i16
15924 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
15925 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
15926 ; VLX: # %bb.0: # %entry
15927 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
15928 ; VLX-NEXT: kmovd %k0, %eax
15929 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15932 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
15933 ; NoVLX: # %bb.0: # %entry
15934 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15935 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
15936 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15937 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15938 ; NoVLX-NEXT: kmovw %k0, %eax
15939 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15940 ; NoVLX-NEXT: vzeroupper
15943 %0 = bitcast <2 x i64> %__a to <4 x i32>
15944 %load = load i32, i32* %__b
15945 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15946 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15947 %2 = icmp ult <4 x i32> %0, %1
15948 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15949 %4 = bitcast <16 x i1> %3 to i16
15953 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
15954 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
15955 ; VLX: # %bb.0: # %entry
15956 ; VLX-NEXT: kmovd %edi, %k1
15957 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
15958 ; VLX-NEXT: kmovd %k0, %eax
15959 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15962 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
15963 ; NoVLX: # %bb.0: # %entry
15964 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15965 ; NoVLX-NEXT: kmovw %edi, %k1
15966 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
15967 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15968 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15969 ; NoVLX-NEXT: kmovw %k0, %eax
15970 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15971 ; NoVLX-NEXT: vzeroupper
15974 %0 = bitcast <2 x i64> %__a to <4 x i32>
15975 %load = load i32, i32* %__b
15976 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15977 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15978 %2 = icmp ult <4 x i32> %0, %1
15979 %3 = bitcast i8 %__u to <8 x i1>
15980 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15981 %4 = and <4 x i1> %extract.i, %2
15982 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15983 %6 = bitcast <16 x i1> %5 to i16
15988 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15989 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
15990 ; VLX: # %bb.0: # %entry
15991 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
15992 ; VLX-NEXT: kmovd %k0, %eax
15995 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
15996 ; NoVLX: # %bb.0: # %entry
15997 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15998 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15999 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16000 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16001 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16002 ; NoVLX-NEXT: kmovw %k0, %eax
16003 ; NoVLX-NEXT: vzeroupper
16006 %0 = bitcast <2 x i64> %__a to <4 x i32>
16007 %1 = bitcast <2 x i64> %__b to <4 x i32>
16008 %2 = icmp ult <4 x i32> %0, %1
16009 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16010 %4 = bitcast <32 x i1> %3 to i32
16014 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16015 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16016 ; VLX: # %bb.0: # %entry
16017 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
16018 ; VLX-NEXT: kmovd %k0, %eax
16021 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16022 ; NoVLX: # %bb.0: # %entry
16023 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16024 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
16025 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16026 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16027 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16028 ; NoVLX-NEXT: kmovw %k0, %eax
16029 ; NoVLX-NEXT: vzeroupper
16032 %0 = bitcast <2 x i64> %__a to <4 x i32>
16033 %load = load <2 x i64>, <2 x i64>* %__b
16034 %1 = bitcast <2 x i64> %load to <4 x i32>
16035 %2 = icmp ult <4 x i32> %0, %1
16036 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16037 %4 = bitcast <32 x i1> %3 to i32
16041 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16042 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16043 ; VLX: # %bb.0: # %entry
16044 ; VLX-NEXT: kmovd %edi, %k1
16045 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16046 ; VLX-NEXT: kmovd %k0, %eax
16049 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16050 ; NoVLX: # %bb.0: # %entry
16051 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16052 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16053 ; NoVLX-NEXT: kmovw %edi, %k1
16054 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16055 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16056 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16057 ; NoVLX-NEXT: kmovw %k0, %eax
16058 ; NoVLX-NEXT: vzeroupper
16061 %0 = bitcast <2 x i64> %__a to <4 x i32>
16062 %1 = bitcast <2 x i64> %__b to <4 x i32>
16063 %2 = icmp ult <4 x i32> %0, %1
16064 %3 = bitcast i8 %__u to <8 x i1>
16065 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16066 %4 = and <4 x i1> %2, %extract.i
16067 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16068 %6 = bitcast <32 x i1> %5 to i32
16072 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16073 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16074 ; VLX: # %bb.0: # %entry
16075 ; VLX-NEXT: kmovd %edi, %k1
16076 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16077 ; VLX-NEXT: kmovd %k0, %eax
16080 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16081 ; NoVLX: # %bb.0: # %entry
16082 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16083 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
16084 ; NoVLX-NEXT: kmovw %edi, %k1
16085 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16086 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16087 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16088 ; NoVLX-NEXT: kmovw %k0, %eax
16089 ; NoVLX-NEXT: vzeroupper
16092 %0 = bitcast <2 x i64> %__a to <4 x i32>
16093 %load = load <2 x i64>, <2 x i64>* %__b
16094 %1 = bitcast <2 x i64> %load to <4 x i32>
16095 %2 = icmp ult <4 x i32> %0, %1
16096 %3 = bitcast i8 %__u to <8 x i1>
16097 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16098 %4 = and <4 x i1> %2, %extract.i
16099 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16100 %6 = bitcast <32 x i1> %5 to i32
16105 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16106 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16107 ; VLX: # %bb.0: # %entry
16108 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
16109 ; VLX-NEXT: kmovd %k0, %eax
16112 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16113 ; NoVLX: # %bb.0: # %entry
16114 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16115 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
16116 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16117 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16118 ; NoVLX-NEXT: kmovw %k0, %eax
16119 ; NoVLX-NEXT: vzeroupper
16122 %0 = bitcast <2 x i64> %__a to <4 x i32>
16123 %load = load i32, i32* %__b
16124 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16125 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16126 %2 = icmp ult <4 x i32> %0, %1
16127 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16128 %4 = bitcast <32 x i1> %3 to i32
16132 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16133 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16134 ; VLX: # %bb.0: # %entry
16135 ; VLX-NEXT: kmovd %edi, %k1
16136 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16137 ; VLX-NEXT: kmovd %k0, %eax
16140 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16141 ; NoVLX: # %bb.0: # %entry
16142 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16143 ; NoVLX-NEXT: kmovw %edi, %k1
16144 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16145 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16146 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16147 ; NoVLX-NEXT: kmovw %k0, %eax
16148 ; NoVLX-NEXT: vzeroupper
16151 %0 = bitcast <2 x i64> %__a to <4 x i32>
16152 %load = load i32, i32* %__b
16153 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16154 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16155 %2 = icmp ult <4 x i32> %0, %1
16156 %3 = bitcast i8 %__u to <8 x i1>
16157 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16158 %4 = and <4 x i1> %extract.i, %2
16159 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16160 %6 = bitcast <32 x i1> %5 to i32
16165 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16166 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16167 ; VLX: # %bb.0: # %entry
16168 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
16169 ; VLX-NEXT: kmovq %k0, %rax
16172 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16173 ; NoVLX: # %bb.0: # %entry
16174 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16175 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16176 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16177 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16178 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16179 ; NoVLX-NEXT: kmovw %k0, %eax
16180 ; NoVLX-NEXT: vzeroupper
16183 %0 = bitcast <2 x i64> %__a to <4 x i32>
16184 %1 = bitcast <2 x i64> %__b to <4 x i32>
16185 %2 = icmp ult <4 x i32> %0, %1
16186 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16187 %4 = bitcast <64 x i1> %3 to i64
16191 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16192 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16193 ; VLX: # %bb.0: # %entry
16194 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
16195 ; VLX-NEXT: kmovq %k0, %rax
16198 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16199 ; NoVLX: # %bb.0: # %entry
16200 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16201 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
16202 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16203 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16204 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16205 ; NoVLX-NEXT: kmovw %k0, %eax
16206 ; NoVLX-NEXT: vzeroupper
16209 %0 = bitcast <2 x i64> %__a to <4 x i32>
16210 %load = load <2 x i64>, <2 x i64>* %__b
16211 %1 = bitcast <2 x i64> %load to <4 x i32>
16212 %2 = icmp ult <4 x i32> %0, %1
16213 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16214 %4 = bitcast <64 x i1> %3 to i64
16218 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16219 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16220 ; VLX: # %bb.0: # %entry
16221 ; VLX-NEXT: kmovd %edi, %k1
16222 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16223 ; VLX-NEXT: kmovq %k0, %rax
16226 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16227 ; NoVLX: # %bb.0: # %entry
16228 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16229 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16230 ; NoVLX-NEXT: kmovw %edi, %k1
16231 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16232 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16233 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16234 ; NoVLX-NEXT: kmovw %k0, %eax
16235 ; NoVLX-NEXT: vzeroupper
16238 %0 = bitcast <2 x i64> %__a to <4 x i32>
16239 %1 = bitcast <2 x i64> %__b to <4 x i32>
16240 %2 = icmp ult <4 x i32> %0, %1
16241 %3 = bitcast i8 %__u to <8 x i1>
16242 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16243 %4 = and <4 x i1> %2, %extract.i
16244 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16245 %6 = bitcast <64 x i1> %5 to i64
16249 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16250 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16251 ; VLX: # %bb.0: # %entry
16252 ; VLX-NEXT: kmovd %edi, %k1
16253 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16254 ; VLX-NEXT: kmovq %k0, %rax
16257 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16258 ; NoVLX: # %bb.0: # %entry
16259 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16260 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
16261 ; NoVLX-NEXT: kmovw %edi, %k1
16262 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16263 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16264 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16265 ; NoVLX-NEXT: kmovw %k0, %eax
16266 ; NoVLX-NEXT: vzeroupper
16269 %0 = bitcast <2 x i64> %__a to <4 x i32>
16270 %load = load <2 x i64>, <2 x i64>* %__b
16271 %1 = bitcast <2 x i64> %load to <4 x i32>
16272 %2 = icmp ult <4 x i32> %0, %1
16273 %3 = bitcast i8 %__u to <8 x i1>
16274 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16275 %4 = and <4 x i1> %2, %extract.i
16276 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16277 %6 = bitcast <64 x i1> %5 to i64
16282 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16283 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16284 ; VLX: # %bb.0: # %entry
16285 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
16286 ; VLX-NEXT: kmovq %k0, %rax
16289 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16290 ; NoVLX: # %bb.0: # %entry
16291 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16292 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
16293 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16294 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16295 ; NoVLX-NEXT: kmovw %k0, %eax
16296 ; NoVLX-NEXT: vzeroupper
16299 %0 = bitcast <2 x i64> %__a to <4 x i32>
16300 %load = load i32, i32* %__b
16301 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16302 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16303 %2 = icmp ult <4 x i32> %0, %1
16304 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16305 %4 = bitcast <64 x i1> %3 to i64
16309 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16310 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16311 ; VLX: # %bb.0: # %entry
16312 ; VLX-NEXT: kmovd %edi, %k1
16313 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16314 ; VLX-NEXT: kmovq %k0, %rax
16317 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16318 ; NoVLX: # %bb.0: # %entry
16319 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16320 ; NoVLX-NEXT: kmovw %edi, %k1
16321 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16322 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16323 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16324 ; NoVLX-NEXT: kmovw %k0, %eax
16325 ; NoVLX-NEXT: vzeroupper
16328 %0 = bitcast <2 x i64> %__a to <4 x i32>
16329 %load = load i32, i32* %__b
16330 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16331 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16332 %2 = icmp ult <4 x i32> %0, %1
16333 %3 = bitcast i8 %__u to <8 x i1>
16334 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16335 %4 = and <4 x i1> %extract.i, %2
16336 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16337 %6 = bitcast <64 x i1> %5 to i64
16342 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16343 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16344 ; VLX: # %bb.0: # %entry
16345 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16346 ; VLX-NEXT: kmovd %k0, %eax
16347 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16348 ; VLX-NEXT: vzeroupper
16351 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16352 ; NoVLX: # %bb.0: # %entry
16353 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16354 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16355 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16356 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16357 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16358 ; NoVLX-NEXT: kmovw %k0, %eax
16359 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16360 ; NoVLX-NEXT: vzeroupper
16363 %0 = bitcast <4 x i64> %__a to <8 x i32>
16364 %1 = bitcast <4 x i64> %__b to <8 x i32>
16365 %2 = icmp ult <8 x i32> %0, %1
16366 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16367 %4 = bitcast <16 x i1> %3 to i16
16371 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16372 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16373 ; VLX: # %bb.0: # %entry
16374 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16375 ; VLX-NEXT: kmovd %k0, %eax
16376 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16377 ; VLX-NEXT: vzeroupper
16380 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16381 ; NoVLX: # %bb.0: # %entry
16382 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16383 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16384 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16385 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16386 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16387 ; NoVLX-NEXT: kmovw %k0, %eax
16388 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16389 ; NoVLX-NEXT: vzeroupper
16392 %0 = bitcast <4 x i64> %__a to <8 x i32>
16393 %load = load <4 x i64>, <4 x i64>* %__b
16394 %1 = bitcast <4 x i64> %load to <8 x i32>
16395 %2 = icmp ult <8 x i32> %0, %1
16396 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16397 %4 = bitcast <16 x i1> %3 to i16
16401 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16402 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16403 ; VLX: # %bb.0: # %entry
16404 ; VLX-NEXT: kmovd %edi, %k1
16405 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16406 ; VLX-NEXT: kmovd %k0, %eax
16407 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16408 ; VLX-NEXT: vzeroupper
16411 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16412 ; NoVLX: # %bb.0: # %entry
16413 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16414 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16415 ; NoVLX-NEXT: kmovw %edi, %k1
16416 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16417 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16418 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16419 ; NoVLX-NEXT: kmovw %k0, %eax
16420 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16421 ; NoVLX-NEXT: vzeroupper
16424 %0 = bitcast <4 x i64> %__a to <8 x i32>
16425 %1 = bitcast <4 x i64> %__b to <8 x i32>
16426 %2 = icmp ult <8 x i32> %0, %1
16427 %3 = bitcast i8 %__u to <8 x i1>
16428 %4 = and <8 x i1> %2, %3
16429 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16430 %6 = bitcast <16 x i1> %5 to i16
16434 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16435 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16436 ; VLX: # %bb.0: # %entry
16437 ; VLX-NEXT: kmovd %edi, %k1
16438 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16439 ; VLX-NEXT: kmovd %k0, %eax
16440 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16441 ; VLX-NEXT: vzeroupper
16444 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16445 ; NoVLX: # %bb.0: # %entry
16446 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16447 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16448 ; NoVLX-NEXT: kmovw %edi, %k1
16449 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16450 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16451 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16452 ; NoVLX-NEXT: kmovw %k0, %eax
16453 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16454 ; NoVLX-NEXT: vzeroupper
16457 %0 = bitcast <4 x i64> %__a to <8 x i32>
16458 %load = load <4 x i64>, <4 x i64>* %__b
16459 %1 = bitcast <4 x i64> %load to <8 x i32>
16460 %2 = icmp ult <8 x i32> %0, %1
16461 %3 = bitcast i8 %__u to <8 x i1>
16462 %4 = and <8 x i1> %2, %3
16463 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16464 %6 = bitcast <16 x i1> %5 to i16
16469 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
16470 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16471 ; VLX: # %bb.0: # %entry
16472 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16473 ; VLX-NEXT: kmovd %k0, %eax
16474 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16475 ; VLX-NEXT: vzeroupper
16478 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16479 ; NoVLX: # %bb.0: # %entry
16480 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16481 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
16482 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16483 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16484 ; NoVLX-NEXT: kmovw %k0, %eax
16485 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16486 ; NoVLX-NEXT: vzeroupper
16489 %0 = bitcast <4 x i64> %__a to <8 x i32>
16490 %load = load i32, i32* %__b
16491 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16492 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16493 %2 = icmp ult <8 x i32> %0, %1
16494 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16495 %4 = bitcast <16 x i1> %3 to i16
16499 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
16500 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16501 ; VLX: # %bb.0: # %entry
16502 ; VLX-NEXT: kmovd %edi, %k1
16503 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16504 ; VLX-NEXT: kmovd %k0, %eax
16505 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16506 ; VLX-NEXT: vzeroupper
16509 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16510 ; NoVLX: # %bb.0: # %entry
16511 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16512 ; NoVLX-NEXT: kmovw %edi, %k1
16513 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16514 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16515 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16516 ; NoVLX-NEXT: kmovw %k0, %eax
16517 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16518 ; NoVLX-NEXT: vzeroupper
16521 %0 = bitcast <4 x i64> %__a to <8 x i32>
16522 %load = load i32, i32* %__b
16523 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16524 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16525 %2 = icmp ult <8 x i32> %0, %1
16526 %3 = bitcast i8 %__u to <8 x i1>
16527 %4 = and <8 x i1> %3, %2
16528 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16529 %6 = bitcast <16 x i1> %5 to i16
16534 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16535 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16536 ; VLX: # %bb.0: # %entry
16537 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16538 ; VLX-NEXT: kmovd %k0, %eax
16539 ; VLX-NEXT: vzeroupper
16542 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16543 ; NoVLX: # %bb.0: # %entry
16544 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16545 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16546 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16547 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16548 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16549 ; NoVLX-NEXT: kmovw %k0, %eax
16550 ; NoVLX-NEXT: vzeroupper
16553 %0 = bitcast <4 x i64> %__a to <8 x i32>
16554 %1 = bitcast <4 x i64> %__b to <8 x i32>
16555 %2 = icmp ult <8 x i32> %0, %1
16556 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16557 %4 = bitcast <32 x i1> %3 to i32
16561 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16562 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16563 ; VLX: # %bb.0: # %entry
16564 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16565 ; VLX-NEXT: kmovd %k0, %eax
16566 ; VLX-NEXT: vzeroupper
16569 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16570 ; NoVLX: # %bb.0: # %entry
16571 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16572 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16573 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16574 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16575 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16576 ; NoVLX-NEXT: kmovw %k0, %eax
16577 ; NoVLX-NEXT: vzeroupper
16580 %0 = bitcast <4 x i64> %__a to <8 x i32>
16581 %load = load <4 x i64>, <4 x i64>* %__b
16582 %1 = bitcast <4 x i64> %load to <8 x i32>
16583 %2 = icmp ult <8 x i32> %0, %1
16584 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16585 %4 = bitcast <32 x i1> %3 to i32
16589 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16590 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16591 ; VLX: # %bb.0: # %entry
16592 ; VLX-NEXT: kmovd %edi, %k1
16593 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16594 ; VLX-NEXT: kmovd %k0, %eax
16595 ; VLX-NEXT: vzeroupper
16598 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16599 ; NoVLX: # %bb.0: # %entry
16600 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16601 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16602 ; NoVLX-NEXT: kmovw %edi, %k1
16603 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16604 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16605 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16606 ; NoVLX-NEXT: kmovw %k0, %eax
16607 ; NoVLX-NEXT: vzeroupper
16610 %0 = bitcast <4 x i64> %__a to <8 x i32>
16611 %1 = bitcast <4 x i64> %__b to <8 x i32>
16612 %2 = icmp ult <8 x i32> %0, %1
16613 %3 = bitcast i8 %__u to <8 x i1>
16614 %4 = and <8 x i1> %2, %3
16615 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16616 %6 = bitcast <32 x i1> %5 to i32
16620 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16621 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16622 ; VLX: # %bb.0: # %entry
16623 ; VLX-NEXT: kmovd %edi, %k1
16624 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16625 ; VLX-NEXT: kmovd %k0, %eax
16626 ; VLX-NEXT: vzeroupper
16629 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16630 ; NoVLX: # %bb.0: # %entry
16631 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16632 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16633 ; NoVLX-NEXT: kmovw %edi, %k1
16634 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16635 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16636 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16637 ; NoVLX-NEXT: kmovw %k0, %eax
16638 ; NoVLX-NEXT: vzeroupper
16641 %0 = bitcast <4 x i64> %__a to <8 x i32>
16642 %load = load <4 x i64>, <4 x i64>* %__b
16643 %1 = bitcast <4 x i64> %load to <8 x i32>
16644 %2 = icmp ult <8 x i32> %0, %1
16645 %3 = bitcast i8 %__u to <8 x i1>
16646 %4 = and <8 x i1> %2, %3
16647 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16648 %6 = bitcast <32 x i1> %5 to i32
16653 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
16654 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
16655 ; VLX: # %bb.0: # %entry
16656 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16657 ; VLX-NEXT: kmovd %k0, %eax
16658 ; VLX-NEXT: vzeroupper
16661 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
16662 ; NoVLX: # %bb.0: # %entry
16663 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16664 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
16665 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16666 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16667 ; NoVLX-NEXT: kmovw %k0, %eax
16668 ; NoVLX-NEXT: vzeroupper
16671 %0 = bitcast <4 x i64> %__a to <8 x i32>
16672 %load = load i32, i32* %__b
16673 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16674 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16675 %2 = icmp ult <8 x i32> %0, %1
16676 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16677 %4 = bitcast <32 x i1> %3 to i32
16681 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
16682 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
16683 ; VLX: # %bb.0: # %entry
16684 ; VLX-NEXT: kmovd %edi, %k1
16685 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16686 ; VLX-NEXT: kmovd %k0, %eax
16687 ; VLX-NEXT: vzeroupper
16690 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
16691 ; NoVLX: # %bb.0: # %entry
16692 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16693 ; NoVLX-NEXT: kmovw %edi, %k1
16694 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16695 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16696 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16697 ; NoVLX-NEXT: kmovw %k0, %eax
16698 ; NoVLX-NEXT: vzeroupper
16701 %0 = bitcast <4 x i64> %__a to <8 x i32>
16702 %load = load i32, i32* %__b
16703 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16704 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16705 %2 = icmp ult <8 x i32> %0, %1
16706 %3 = bitcast i8 %__u to <8 x i1>
16707 %4 = and <8 x i1> %3, %2
16708 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16709 %6 = bitcast <32 x i1> %5 to i32
16714 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16715 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
16716 ; VLX: # %bb.0: # %entry
16717 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16718 ; VLX-NEXT: kmovq %k0, %rax
16719 ; VLX-NEXT: vzeroupper
16722 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
16723 ; NoVLX: # %bb.0: # %entry
16724 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16725 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16726 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16727 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16728 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16729 ; NoVLX-NEXT: kmovw %k0, %eax
16730 ; NoVLX-NEXT: vzeroupper
16733 %0 = bitcast <4 x i64> %__a to <8 x i32>
16734 %1 = bitcast <4 x i64> %__b to <8 x i32>
16735 %2 = icmp ult <8 x i32> %0, %1
16736 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16737 %4 = bitcast <64 x i1> %3 to i64
16741 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16742 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
16743 ; VLX: # %bb.0: # %entry
16744 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16745 ; VLX-NEXT: kmovq %k0, %rax
16746 ; VLX-NEXT: vzeroupper
16749 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
16750 ; NoVLX: # %bb.0: # %entry
16751 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16752 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16753 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16754 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16755 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16756 ; NoVLX-NEXT: kmovw %k0, %eax
16757 ; NoVLX-NEXT: vzeroupper
16760 %0 = bitcast <4 x i64> %__a to <8 x i32>
16761 %load = load <4 x i64>, <4 x i64>* %__b
16762 %1 = bitcast <4 x i64> %load to <8 x i32>
16763 %2 = icmp ult <8 x i32> %0, %1
16764 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16765 %4 = bitcast <64 x i1> %3 to i64
16769 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16770 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
16771 ; VLX: # %bb.0: # %entry
16772 ; VLX-NEXT: kmovd %edi, %k1
16773 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16774 ; VLX-NEXT: kmovq %k0, %rax
16775 ; VLX-NEXT: vzeroupper
16778 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
16779 ; NoVLX: # %bb.0: # %entry
16780 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16781 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16782 ; NoVLX-NEXT: kmovw %edi, %k1
16783 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16784 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16785 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16786 ; NoVLX-NEXT: kmovw %k0, %eax
16787 ; NoVLX-NEXT: vzeroupper
16790 %0 = bitcast <4 x i64> %__a to <8 x i32>
16791 %1 = bitcast <4 x i64> %__b to <8 x i32>
16792 %2 = icmp ult <8 x i32> %0, %1
16793 %3 = bitcast i8 %__u to <8 x i1>
16794 %4 = and <8 x i1> %2, %3
16795 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16796 %6 = bitcast <64 x i1> %5 to i64
16800 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16801 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
16802 ; VLX: # %bb.0: # %entry
16803 ; VLX-NEXT: kmovd %edi, %k1
16804 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16805 ; VLX-NEXT: kmovq %k0, %rax
16806 ; VLX-NEXT: vzeroupper
16809 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
16810 ; NoVLX: # %bb.0: # %entry
16811 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16812 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16813 ; NoVLX-NEXT: kmovw %edi, %k1
16814 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16815 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16816 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16817 ; NoVLX-NEXT: kmovw %k0, %eax
16818 ; NoVLX-NEXT: vzeroupper
16821 %0 = bitcast <4 x i64> %__a to <8 x i32>
16822 %load = load <4 x i64>, <4 x i64>* %__b
16823 %1 = bitcast <4 x i64> %load to <8 x i32>
16824 %2 = icmp ult <8 x i32> %0, %1
16825 %3 = bitcast i8 %__u to <8 x i1>
16826 %4 = and <8 x i1> %2, %3
16827 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16828 %6 = bitcast <64 x i1> %5 to i64
16833 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
16834 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
16835 ; VLX: # %bb.0: # %entry
16836 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16837 ; VLX-NEXT: kmovq %k0, %rax
16838 ; VLX-NEXT: vzeroupper
16841 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
16842 ; NoVLX: # %bb.0: # %entry
16843 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16844 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
16845 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16846 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16847 ; NoVLX-NEXT: kmovw %k0, %eax
16848 ; NoVLX-NEXT: vzeroupper
16851 %0 = bitcast <4 x i64> %__a to <8 x i32>
16852 %load = load i32, i32* %__b
16853 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16854 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16855 %2 = icmp ult <8 x i32> %0, %1
16856 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16857 %4 = bitcast <64 x i1> %3 to i64
16861 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
16862 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
16863 ; VLX: # %bb.0: # %entry
16864 ; VLX-NEXT: kmovd %edi, %k1
16865 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16866 ; VLX-NEXT: kmovq %k0, %rax
16867 ; VLX-NEXT: vzeroupper
16870 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
16871 ; NoVLX: # %bb.0: # %entry
16872 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16873 ; NoVLX-NEXT: kmovw %edi, %k1
16874 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16875 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16876 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16877 ; NoVLX-NEXT: kmovw %k0, %eax
16878 ; NoVLX-NEXT: vzeroupper
16881 %0 = bitcast <4 x i64> %__a to <8 x i32>
16882 %load = load i32, i32* %__b
16883 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16884 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16885 %2 = icmp ult <8 x i32> %0, %1
16886 %3 = bitcast i8 %__u to <8 x i1>
16887 %4 = and <8 x i1> %3, %2
16888 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16889 %6 = bitcast <64 x i1> %5 to i64
16894 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
16895 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
16896 ; VLX: # %bb.0: # %entry
16897 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16898 ; VLX-NEXT: kmovd %k0, %eax
16899 ; VLX-NEXT: vzeroupper
16902 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
16903 ; NoVLX: # %bb.0: # %entry
16904 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16905 ; NoVLX-NEXT: kmovw %k0, %eax
16906 ; NoVLX-NEXT: vzeroupper
16909 %0 = bitcast <8 x i64> %__a to <16 x i32>
16910 %1 = bitcast <8 x i64> %__b to <16 x i32>
16911 %2 = icmp ult <16 x i32> %0, %1
16912 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
16913 %4 = bitcast <32 x i1> %3 to i32
16917 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
16918 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
16919 ; VLX: # %bb.0: # %entry
16920 ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
16921 ; VLX-NEXT: kmovd %k0, %eax
16922 ; VLX-NEXT: vzeroupper
16925 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
16926 ; NoVLX: # %bb.0: # %entry
16927 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
16928 ; NoVLX-NEXT: kmovw %k0, %eax
16929 ; NoVLX-NEXT: vzeroupper
16932 %0 = bitcast <8 x i64> %__a to <16 x i32>
16933 %load = load <8 x i64>, <8 x i64>* %__b
16934 %1 = bitcast <8 x i64> %load to <16 x i32>
16935 %2 = icmp ult <16 x i32> %0, %1
16936 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
16937 %4 = bitcast <32 x i1> %3 to i32
16941 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
16942 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
16943 ; VLX: # %bb.0: # %entry
16944 ; VLX-NEXT: kmovd %edi, %k1
16945 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16946 ; VLX-NEXT: kmovd %k0, %eax
16947 ; VLX-NEXT: vzeroupper
16950 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
16951 ; NoVLX: # %bb.0: # %entry
16952 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16953 ; NoVLX-NEXT: kmovw %k0, %eax
16954 ; NoVLX-NEXT: andl %edi, %eax
16955 ; NoVLX-NEXT: vzeroupper
16958 %0 = bitcast <8 x i64> %__a to <16 x i32>
16959 %1 = bitcast <8 x i64> %__b to <16 x i32>
16960 %2 = icmp ult <16 x i32> %0, %1
16961 %3 = bitcast i16 %__u to <16 x i1>
16962 %4 = and <16 x i1> %2, %3
16963 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
16964 %6 = bitcast <32 x i1> %5 to i32
16968 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
16969 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
16970 ; VLX: # %bb.0: # %entry
16971 ; VLX-NEXT: kmovd %edi, %k1
16972 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
16973 ; VLX-NEXT: kmovd %k0, %eax
16974 ; VLX-NEXT: vzeroupper
16977 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
16978 ; NoVLX: # %bb.0: # %entry
16979 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0
16980 ; NoVLX-NEXT: kmovw %k0, %eax
16981 ; NoVLX-NEXT: andl %edi, %eax
16982 ; NoVLX-NEXT: vzeroupper
16985 %0 = bitcast <8 x i64> %__a to <16 x i32>
16986 %load = load <8 x i64>, <8 x i64>* %__b
16987 %1 = bitcast <8 x i64> %load to <16 x i32>
16988 %2 = icmp ult <16 x i32> %0, %1
16989 %3 = bitcast i16 %__u to <16 x i1>
16990 %4 = and <16 x i1> %2, %3
16991 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
16992 %6 = bitcast <32 x i1> %5 to i32
16997 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
16998 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
16999 ; VLX: # %bb.0: # %entry
17000 ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17001 ; VLX-NEXT: kmovd %k0, %eax
17002 ; VLX-NEXT: vzeroupper
17005 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17006 ; NoVLX: # %bb.0: # %entry
17007 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17008 ; NoVLX-NEXT: kmovw %k0, %eax
17009 ; NoVLX-NEXT: vzeroupper
17012 %0 = bitcast <8 x i64> %__a to <16 x i32>
17013 %load = load i32, i32* %__b
17014 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17015 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17016 %2 = icmp ult <16 x i32> %0, %1
17017 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17018 %4 = bitcast <32 x i1> %3 to i32
17022 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
17023 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17024 ; VLX: # %bb.0: # %entry
17025 ; VLX-NEXT: kmovd %edi, %k1
17026 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17027 ; VLX-NEXT: kmovd %k0, %eax
17028 ; VLX-NEXT: vzeroupper
17031 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17032 ; NoVLX: # %bb.0: # %entry
17033 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0
17034 ; NoVLX-NEXT: kmovw %k0, %eax
17035 ; NoVLX-NEXT: andl %edi, %eax
17036 ; NoVLX-NEXT: vzeroupper
17039 %0 = bitcast <8 x i64> %__a to <16 x i32>
17040 %load = load i32, i32* %__b
17041 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17042 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17043 %2 = icmp ult <16 x i32> %0, %1
17044 %3 = bitcast i16 %__u to <16 x i1>
17045 %4 = and <16 x i1> %3, %2
17046 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17047 %6 = bitcast <32 x i1> %5 to i32
17052 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17053 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17054 ; VLX: # %bb.0: # %entry
17055 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17056 ; VLX-NEXT: kmovq %k0, %rax
17057 ; VLX-NEXT: vzeroupper
17060 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17061 ; NoVLX: # %bb.0: # %entry
17062 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17063 ; NoVLX-NEXT: kmovw %k0, %eax
17064 ; NoVLX-NEXT: vzeroupper
17067 %0 = bitcast <8 x i64> %__a to <16 x i32>
17068 %1 = bitcast <8 x i64> %__b to <16 x i32>
17069 %2 = icmp ult <16 x i32> %0, %1
17070 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17071 %4 = bitcast <64 x i1> %3 to i64
17075 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17076 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17077 ; VLX: # %bb.0: # %entry
17078 ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17079 ; VLX-NEXT: kmovq %k0, %rax
17080 ; VLX-NEXT: vzeroupper
17083 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17084 ; NoVLX: # %bb.0: # %entry
17085 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17086 ; NoVLX-NEXT: kmovw %k0, %eax
17087 ; NoVLX-NEXT: vzeroupper
17090 %0 = bitcast <8 x i64> %__a to <16 x i32>
17091 %load = load <8 x i64>, <8 x i64>* %__b
17092 %1 = bitcast <8 x i64> %load to <16 x i32>
17093 %2 = icmp ult <16 x i32> %0, %1
17094 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17095 %4 = bitcast <64 x i1> %3 to i64
17099 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17100 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17101 ; VLX: # %bb.0: # %entry
17102 ; VLX-NEXT: kmovd %edi, %k1
17103 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17104 ; VLX-NEXT: kmovq %k0, %rax
17105 ; VLX-NEXT: vzeroupper
17108 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17109 ; NoVLX: # %bb.0: # %entry
17110 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17111 ; NoVLX-NEXT: kmovw %k0, %eax
17112 ; NoVLX-NEXT: andl %edi, %eax
17113 ; NoVLX-NEXT: vzeroupper
17116 %0 = bitcast <8 x i64> %__a to <16 x i32>
17117 %1 = bitcast <8 x i64> %__b to <16 x i32>
17118 %2 = icmp ult <16 x i32> %0, %1
17119 %3 = bitcast i16 %__u to <16 x i1>
17120 %4 = and <16 x i1> %2, %3
17121 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17122 %6 = bitcast <64 x i1> %5 to i64
17126 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17127 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17128 ; VLX: # %bb.0: # %entry
17129 ; VLX-NEXT: kmovd %edi, %k1
17130 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
17131 ; VLX-NEXT: kmovq %k0, %rax
17132 ; VLX-NEXT: vzeroupper
17135 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17136 ; NoVLX: # %bb.0: # %entry
17137 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0
17138 ; NoVLX-NEXT: kmovw %k0, %eax
17139 ; NoVLX-NEXT: andl %edi, %eax
17140 ; NoVLX-NEXT: vzeroupper
17143 %0 = bitcast <8 x i64> %__a to <16 x i32>
17144 %load = load <8 x i64>, <8 x i64>* %__b
17145 %1 = bitcast <8 x i64> %load to <16 x i32>
17146 %2 = icmp ult <16 x i32> %0, %1
17147 %3 = bitcast i16 %__u to <16 x i1>
17148 %4 = and <16 x i1> %2, %3
17149 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17150 %6 = bitcast <64 x i1> %5 to i64
17155 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
17156 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17157 ; VLX: # %bb.0: # %entry
17158 ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17159 ; VLX-NEXT: kmovq %k0, %rax
17160 ; VLX-NEXT: vzeroupper
17163 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17164 ; NoVLX: # %bb.0: # %entry
17165 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17166 ; NoVLX-NEXT: kmovw %k0, %eax
17167 ; NoVLX-NEXT: vzeroupper
17170 %0 = bitcast <8 x i64> %__a to <16 x i32>
17171 %load = load i32, i32* %__b
17172 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17173 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17174 %2 = icmp ult <16 x i32> %0, %1
17175 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17176 %4 = bitcast <64 x i1> %3 to i64
17180 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
17181 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17182 ; VLX: # %bb.0: # %entry
17183 ; VLX-NEXT: kmovd %edi, %k1
17184 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17185 ; VLX-NEXT: kmovq %k0, %rax
17186 ; VLX-NEXT: vzeroupper
17189 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17190 ; NoVLX: # %bb.0: # %entry
17191 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0
17192 ; NoVLX-NEXT: kmovw %k0, %eax
17193 ; NoVLX-NEXT: andl %edi, %eax
17194 ; NoVLX-NEXT: vzeroupper
17197 %0 = bitcast <8 x i64> %__a to <16 x i32>
17198 %load = load i32, i32* %__b
17199 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17200 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17201 %2 = icmp ult <16 x i32> %0, %1
17202 %3 = bitcast i16 %__u to <16 x i1>
17203 %4 = and <16 x i1> %3, %2
17204 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17205 %6 = bitcast <64 x i1> %5 to i64
17210 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17211 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17212 ; VLX: # %bb.0: # %entry
17213 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17214 ; VLX-NEXT: kmovb %k0, %eax
17217 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17218 ; NoVLX: # %bb.0: # %entry
17219 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17220 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17221 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17222 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17223 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17224 ; NoVLX-NEXT: kmovw %k0, %eax
17225 ; NoVLX-NEXT: vzeroupper
17228 %0 = bitcast <2 x i64> %__a to <2 x i64>
17229 %1 = bitcast <2 x i64> %__b to <2 x i64>
17230 %2 = icmp ult <2 x i64> %0, %1
17231 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17232 %4 = bitcast <4 x i1> %3 to i4
17236 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17237 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17238 ; VLX: # %bb.0: # %entry
17239 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17240 ; VLX-NEXT: kmovb %k0, %eax
17243 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17244 ; NoVLX: # %bb.0: # %entry
17245 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17246 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17247 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17248 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17249 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17250 ; NoVLX-NEXT: kmovw %k0, %eax
17251 ; NoVLX-NEXT: vzeroupper
17254 %0 = bitcast <2 x i64> %__a to <2 x i64>
17255 %load = load <2 x i64>, <2 x i64>* %__b
17256 %1 = bitcast <2 x i64> %load to <2 x i64>
17257 %2 = icmp ult <2 x i64> %0, %1
17258 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17259 %4 = bitcast <4 x i1> %3 to i4
17263 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17264 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17265 ; VLX: # %bb.0: # %entry
17266 ; VLX-NEXT: kmovd %edi, %k1
17267 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17268 ; VLX-NEXT: kmovb %k0, %eax
17271 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17272 ; NoVLX: # %bb.0: # %entry
17273 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17274 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17275 ; NoVLX-NEXT: kmovw %edi, %k1
17276 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17277 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17278 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17279 ; NoVLX-NEXT: kmovw %k0, %eax
17280 ; NoVLX-NEXT: vzeroupper
17283 %0 = bitcast <2 x i64> %__a to <2 x i64>
17284 %1 = bitcast <2 x i64> %__b to <2 x i64>
17285 %2 = icmp ult <2 x i64> %0, %1
17286 %3 = bitcast i8 %__u to <8 x i1>
17287 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17288 %4 = and <2 x i1> %2, %extract.i
17289 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17290 %6 = bitcast <4 x i1> %5 to i4
17294 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17295 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17296 ; VLX: # %bb.0: # %entry
17297 ; VLX-NEXT: kmovd %edi, %k1
17298 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17299 ; VLX-NEXT: kmovb %k0, %eax
17302 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17303 ; NoVLX: # %bb.0: # %entry
17304 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17305 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17306 ; NoVLX-NEXT: kmovw %edi, %k1
17307 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17308 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17309 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17310 ; NoVLX-NEXT: kmovw %k0, %eax
17311 ; NoVLX-NEXT: vzeroupper
17314 %0 = bitcast <2 x i64> %__a to <2 x i64>
17315 %load = load <2 x i64>, <2 x i64>* %__b
17316 %1 = bitcast <2 x i64> %load to <2 x i64>
17317 %2 = icmp ult <2 x i64> %0, %1
17318 %3 = bitcast i8 %__u to <8 x i1>
17319 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17320 %4 = and <2 x i1> %2, %extract.i
17321 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17322 %6 = bitcast <4 x i1> %5 to i4
17327 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17328 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17329 ; VLX: # %bb.0: # %entry
17330 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17331 ; VLX-NEXT: kmovb %k0, %eax
17334 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17335 ; NoVLX: # %bb.0: # %entry
17336 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17337 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17338 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17339 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17340 ; NoVLX-NEXT: kmovw %k0, %eax
17341 ; NoVLX-NEXT: vzeroupper
17344 %0 = bitcast <2 x i64> %__a to <2 x i64>
17345 %load = load i64, i64* %__b
17346 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17347 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17348 %2 = icmp ult <2 x i64> %0, %1
17349 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17350 %4 = bitcast <4 x i1> %3 to i4
17354 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17355 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17356 ; VLX: # %bb.0: # %entry
17357 ; VLX-NEXT: kmovd %edi, %k1
17358 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17359 ; VLX-NEXT: kmovb %k0, %eax
17362 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17363 ; NoVLX: # %bb.0: # %entry
17364 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17365 ; NoVLX-NEXT: kmovw %edi, %k1
17366 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17367 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17368 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17369 ; NoVLX-NEXT: kmovw %k0, %eax
17370 ; NoVLX-NEXT: vzeroupper
17373 %0 = bitcast <2 x i64> %__a to <2 x i64>
17374 %load = load i64, i64* %__b
17375 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17376 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17377 %2 = icmp ult <2 x i64> %0, %1
17378 %3 = bitcast i8 %__u to <8 x i1>
17379 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17380 %4 = and <2 x i1> %extract.i, %2
17381 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17382 %6 = bitcast <4 x i1> %5 to i4
17387 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17388 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17389 ; VLX: # %bb.0: # %entry
17390 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17391 ; VLX-NEXT: kmovd %k0, %eax
17392 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17395 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17396 ; NoVLX: # %bb.0: # %entry
17397 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17398 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17399 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17400 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17401 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17402 ; NoVLX-NEXT: kmovw %k0, %eax
17403 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17404 ; NoVLX-NEXT: vzeroupper
17407 %0 = bitcast <2 x i64> %__a to <2 x i64>
17408 %1 = bitcast <2 x i64> %__b to <2 x i64>
17409 %2 = icmp ult <2 x i64> %0, %1
17410 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17411 %4 = bitcast <8 x i1> %3 to i8
17415 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17416 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17417 ; VLX: # %bb.0: # %entry
17418 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17419 ; VLX-NEXT: kmovd %k0, %eax
17420 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17423 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17424 ; NoVLX: # %bb.0: # %entry
17425 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17426 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17427 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17428 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17429 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17430 ; NoVLX-NEXT: kmovw %k0, %eax
17431 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17432 ; NoVLX-NEXT: vzeroupper
17435 %0 = bitcast <2 x i64> %__a to <2 x i64>
17436 %load = load <2 x i64>, <2 x i64>* %__b
17437 %1 = bitcast <2 x i64> %load to <2 x i64>
17438 %2 = icmp ult <2 x i64> %0, %1
17439 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17440 %4 = bitcast <8 x i1> %3 to i8
17444 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17445 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17446 ; VLX: # %bb.0: # %entry
17447 ; VLX-NEXT: kmovd %edi, %k1
17448 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17449 ; VLX-NEXT: kmovd %k0, %eax
17450 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17453 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17454 ; NoVLX: # %bb.0: # %entry
17455 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17456 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17457 ; NoVLX-NEXT: kmovw %edi, %k1
17458 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17459 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17460 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17461 ; NoVLX-NEXT: kmovw %k0, %eax
17462 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17463 ; NoVLX-NEXT: vzeroupper
17466 %0 = bitcast <2 x i64> %__a to <2 x i64>
17467 %1 = bitcast <2 x i64> %__b to <2 x i64>
17468 %2 = icmp ult <2 x i64> %0, %1
17469 %3 = bitcast i8 %__u to <8 x i1>
17470 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17471 %4 = and <2 x i1> %2, %extract.i
17472 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17473 %6 = bitcast <8 x i1> %5 to i8
17477 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17478 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17479 ; VLX: # %bb.0: # %entry
17480 ; VLX-NEXT: kmovd %edi, %k1
17481 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17482 ; VLX-NEXT: kmovd %k0, %eax
17483 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17486 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17487 ; NoVLX: # %bb.0: # %entry
17488 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17489 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17490 ; NoVLX-NEXT: kmovw %edi, %k1
17491 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17492 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17493 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17494 ; NoVLX-NEXT: kmovw %k0, %eax
17495 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17496 ; NoVLX-NEXT: vzeroupper
17499 %0 = bitcast <2 x i64> %__a to <2 x i64>
17500 %load = load <2 x i64>, <2 x i64>* %__b
17501 %1 = bitcast <2 x i64> %load to <2 x i64>
17502 %2 = icmp ult <2 x i64> %0, %1
17503 %3 = bitcast i8 %__u to <8 x i1>
17504 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17505 %4 = and <2 x i1> %2, %extract.i
17506 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17507 %6 = bitcast <8 x i1> %5 to i8
17512 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17513 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17514 ; VLX: # %bb.0: # %entry
17515 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17516 ; VLX-NEXT: kmovd %k0, %eax
17517 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17520 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17521 ; NoVLX: # %bb.0: # %entry
17522 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17523 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17524 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17525 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17526 ; NoVLX-NEXT: kmovw %k0, %eax
17527 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17528 ; NoVLX-NEXT: vzeroupper
17531 %0 = bitcast <2 x i64> %__a to <2 x i64>
17532 %load = load i64, i64* %__b
17533 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17534 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17535 %2 = icmp ult <2 x i64> %0, %1
17536 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17537 %4 = bitcast <8 x i1> %3 to i8
17541 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17542 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17543 ; VLX: # %bb.0: # %entry
17544 ; VLX-NEXT: kmovd %edi, %k1
17545 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17546 ; VLX-NEXT: kmovd %k0, %eax
17547 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17550 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17551 ; NoVLX: # %bb.0: # %entry
17552 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17553 ; NoVLX-NEXT: kmovw %edi, %k1
17554 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17555 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17556 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17557 ; NoVLX-NEXT: kmovw %k0, %eax
17558 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17559 ; NoVLX-NEXT: vzeroupper
17562 %0 = bitcast <2 x i64> %__a to <2 x i64>
17563 %load = load i64, i64* %__b
17564 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17565 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17566 %2 = icmp ult <2 x i64> %0, %1
17567 %3 = bitcast i8 %__u to <8 x i1>
17568 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17569 %4 = and <2 x i1> %extract.i, %2
17570 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17571 %6 = bitcast <8 x i1> %5 to i8
17576 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17577 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17578 ; VLX: # %bb.0: # %entry
17579 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17580 ; VLX-NEXT: kmovd %k0, %eax
17581 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17584 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17585 ; NoVLX: # %bb.0: # %entry
17586 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17587 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17588 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17589 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17590 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17591 ; NoVLX-NEXT: kmovw %k0, %eax
17592 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17593 ; NoVLX-NEXT: vzeroupper
17596 %0 = bitcast <2 x i64> %__a to <2 x i64>
17597 %1 = bitcast <2 x i64> %__b to <2 x i64>
17598 %2 = icmp ult <2 x i64> %0, %1
17599 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17600 %4 = bitcast <16 x i1> %3 to i16
17604 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17605 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17606 ; VLX: # %bb.0: # %entry
17607 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17608 ; VLX-NEXT: kmovd %k0, %eax
17609 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17612 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17613 ; NoVLX: # %bb.0: # %entry
17614 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17615 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17616 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17617 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17618 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17619 ; NoVLX-NEXT: kmovw %k0, %eax
17620 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17621 ; NoVLX-NEXT: vzeroupper
17624 %0 = bitcast <2 x i64> %__a to <2 x i64>
17625 %load = load <2 x i64>, <2 x i64>* %__b
17626 %1 = bitcast <2 x i64> %load to <2 x i64>
17627 %2 = icmp ult <2 x i64> %0, %1
17628 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17629 %4 = bitcast <16 x i1> %3 to i16
17633 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17634 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
17635 ; VLX: # %bb.0: # %entry
17636 ; VLX-NEXT: kmovd %edi, %k1
17637 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17638 ; VLX-NEXT: kmovd %k0, %eax
17639 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17642 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
17643 ; NoVLX: # %bb.0: # %entry
17644 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17645 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17646 ; NoVLX-NEXT: kmovw %edi, %k1
17647 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17648 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17649 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17650 ; NoVLX-NEXT: kmovw %k0, %eax
17651 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17652 ; NoVLX-NEXT: vzeroupper
17655 %0 = bitcast <2 x i64> %__a to <2 x i64>
17656 %1 = bitcast <2 x i64> %__b to <2 x i64>
17657 %2 = icmp ult <2 x i64> %0, %1
17658 %3 = bitcast i8 %__u to <8 x i1>
17659 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17660 %4 = and <2 x i1> %2, %extract.i
17661 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17662 %6 = bitcast <16 x i1> %5 to i16
17666 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17667 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
17668 ; VLX: # %bb.0: # %entry
17669 ; VLX-NEXT: kmovd %edi, %k1
17670 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17671 ; VLX-NEXT: kmovd %k0, %eax
17672 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17675 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
17676 ; NoVLX: # %bb.0: # %entry
17677 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17678 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17679 ; NoVLX-NEXT: kmovw %edi, %k1
17680 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17681 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17682 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17683 ; NoVLX-NEXT: kmovw %k0, %eax
17684 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17685 ; NoVLX-NEXT: vzeroupper
17688 %0 = bitcast <2 x i64> %__a to <2 x i64>
17689 %load = load <2 x i64>, <2 x i64>* %__b
17690 %1 = bitcast <2 x i64> %load to <2 x i64>
17691 %2 = icmp ult <2 x i64> %0, %1
17692 %3 = bitcast i8 %__u to <8 x i1>
17693 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17694 %4 = and <2 x i1> %2, %extract.i
17695 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17696 %6 = bitcast <16 x i1> %5 to i16
17701 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17702 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
17703 ; VLX: # %bb.0: # %entry
17704 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17705 ; VLX-NEXT: kmovd %k0, %eax
17706 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17709 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
17710 ; NoVLX: # %bb.0: # %entry
17711 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17712 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17713 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17714 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17715 ; NoVLX-NEXT: kmovw %k0, %eax
17716 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17717 ; NoVLX-NEXT: vzeroupper
17720 %0 = bitcast <2 x i64> %__a to <2 x i64>
17721 %load = load i64, i64* %__b
17722 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17723 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17724 %2 = icmp ult <2 x i64> %0, %1
17725 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17726 %4 = bitcast <16 x i1> %3 to i16
17730 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17731 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
17732 ; VLX: # %bb.0: # %entry
17733 ; VLX-NEXT: kmovd %edi, %k1
17734 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17735 ; VLX-NEXT: kmovd %k0, %eax
17736 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17739 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
17740 ; NoVLX: # %bb.0: # %entry
17741 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17742 ; NoVLX-NEXT: kmovw %edi, %k1
17743 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17744 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17745 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17746 ; NoVLX-NEXT: kmovw %k0, %eax
17747 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17748 ; NoVLX-NEXT: vzeroupper
17751 %0 = bitcast <2 x i64> %__a to <2 x i64>
17752 %load = load i64, i64* %__b
17753 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17754 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17755 %2 = icmp ult <2 x i64> %0, %1
17756 %3 = bitcast i8 %__u to <8 x i1>
17757 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17758 %4 = and <2 x i1> %extract.i, %2
17759 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17760 %6 = bitcast <16 x i1> %5 to i16
17765 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17766 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
17767 ; VLX: # %bb.0: # %entry
17768 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17769 ; VLX-NEXT: kmovd %k0, %eax
17772 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
17773 ; NoVLX: # %bb.0: # %entry
17774 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17775 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17776 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17777 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17778 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17779 ; NoVLX-NEXT: kmovw %k0, %eax
17780 ; NoVLX-NEXT: vzeroupper
17783 %0 = bitcast <2 x i64> %__a to <2 x i64>
17784 %1 = bitcast <2 x i64> %__b to <2 x i64>
17785 %2 = icmp ult <2 x i64> %0, %1
17786 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17787 %4 = bitcast <32 x i1> %3 to i32
17791 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17792 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
17793 ; VLX: # %bb.0: # %entry
17794 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17795 ; VLX-NEXT: kmovd %k0, %eax
17798 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
17799 ; NoVLX: # %bb.0: # %entry
17800 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17801 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17802 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17803 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17804 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17805 ; NoVLX-NEXT: kmovw %k0, %eax
17806 ; NoVLX-NEXT: vzeroupper
17809 %0 = bitcast <2 x i64> %__a to <2 x i64>
17810 %load = load <2 x i64>, <2 x i64>* %__b
17811 %1 = bitcast <2 x i64> %load to <2 x i64>
17812 %2 = icmp ult <2 x i64> %0, %1
17813 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17814 %4 = bitcast <32 x i1> %3 to i32
17818 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17819 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
17820 ; VLX: # %bb.0: # %entry
17821 ; VLX-NEXT: kmovd %edi, %k1
17822 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17823 ; VLX-NEXT: kmovd %k0, %eax
17826 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
17827 ; NoVLX: # %bb.0: # %entry
17828 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17829 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17830 ; NoVLX-NEXT: kmovw %edi, %k1
17831 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17832 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17833 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17834 ; NoVLX-NEXT: kmovw %k0, %eax
17835 ; NoVLX-NEXT: vzeroupper
17838 %0 = bitcast <2 x i64> %__a to <2 x i64>
17839 %1 = bitcast <2 x i64> %__b to <2 x i64>
17840 %2 = icmp ult <2 x i64> %0, %1
17841 %3 = bitcast i8 %__u to <8 x i1>
17842 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17843 %4 = and <2 x i1> %2, %extract.i
17844 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17845 %6 = bitcast <32 x i1> %5 to i32
17849 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17850 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
17851 ; VLX: # %bb.0: # %entry
17852 ; VLX-NEXT: kmovd %edi, %k1
17853 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17854 ; VLX-NEXT: kmovd %k0, %eax
17857 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
17858 ; NoVLX: # %bb.0: # %entry
17859 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17860 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17861 ; NoVLX-NEXT: kmovw %edi, %k1
17862 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17863 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17864 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17865 ; NoVLX-NEXT: kmovw %k0, %eax
17866 ; NoVLX-NEXT: vzeroupper
17869 %0 = bitcast <2 x i64> %__a to <2 x i64>
17870 %load = load <2 x i64>, <2 x i64>* %__b
17871 %1 = bitcast <2 x i64> %load to <2 x i64>
17872 %2 = icmp ult <2 x i64> %0, %1
17873 %3 = bitcast i8 %__u to <8 x i1>
17874 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17875 %4 = and <2 x i1> %2, %extract.i
17876 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17877 %6 = bitcast <32 x i1> %5 to i32
17882 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17883 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
17884 ; VLX: # %bb.0: # %entry
17885 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17886 ; VLX-NEXT: kmovd %k0, %eax
17889 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
17890 ; NoVLX: # %bb.0: # %entry
17891 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17892 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17893 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17894 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17895 ; NoVLX-NEXT: kmovw %k0, %eax
17896 ; NoVLX-NEXT: vzeroupper
17899 %0 = bitcast <2 x i64> %__a to <2 x i64>
17900 %load = load i64, i64* %__b
17901 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17902 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17903 %2 = icmp ult <2 x i64> %0, %1
17904 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17905 %4 = bitcast <32 x i1> %3 to i32
17909 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17910 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
17911 ; VLX: # %bb.0: # %entry
17912 ; VLX-NEXT: kmovd %edi, %k1
17913 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17914 ; VLX-NEXT: kmovd %k0, %eax
17917 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
17918 ; NoVLX: # %bb.0: # %entry
17919 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17920 ; NoVLX-NEXT: kmovw %edi, %k1
17921 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17922 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17923 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17924 ; NoVLX-NEXT: kmovw %k0, %eax
17925 ; NoVLX-NEXT: vzeroupper
17928 %0 = bitcast <2 x i64> %__a to <2 x i64>
17929 %load = load i64, i64* %__b
17930 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17931 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17932 %2 = icmp ult <2 x i64> %0, %1
17933 %3 = bitcast i8 %__u to <8 x i1>
17934 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17935 %4 = and <2 x i1> %extract.i, %2
17936 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17937 %6 = bitcast <32 x i1> %5 to i32
17942 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17943 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
17944 ; VLX: # %bb.0: # %entry
17945 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17946 ; VLX-NEXT: kmovq %k0, %rax
17949 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
17950 ; NoVLX: # %bb.0: # %entry
17951 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17952 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17953 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17954 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17955 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17956 ; NoVLX-NEXT: kmovw %k0, %eax
17957 ; NoVLX-NEXT: vzeroupper
17960 %0 = bitcast <2 x i64> %__a to <2 x i64>
17961 %1 = bitcast <2 x i64> %__b to <2 x i64>
17962 %2 = icmp ult <2 x i64> %0, %1
17963 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17964 %4 = bitcast <64 x i1> %3 to i64
17968 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17969 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
17970 ; VLX: # %bb.0: # %entry
17971 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17972 ; VLX-NEXT: kmovq %k0, %rax
17975 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
17976 ; NoVLX: # %bb.0: # %entry
17977 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17978 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17979 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17980 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17981 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17982 ; NoVLX-NEXT: kmovw %k0, %eax
17983 ; NoVLX-NEXT: vzeroupper
17986 %0 = bitcast <2 x i64> %__a to <2 x i64>
17987 %load = load <2 x i64>, <2 x i64>* %__b
17988 %1 = bitcast <2 x i64> %load to <2 x i64>
17989 %2 = icmp ult <2 x i64> %0, %1
17990 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17991 %4 = bitcast <64 x i1> %3 to i64
17995 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17996 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
17997 ; VLX: # %bb.0: # %entry
17998 ; VLX-NEXT: kmovd %edi, %k1
17999 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
18000 ; VLX-NEXT: kmovq %k0, %rax
18003 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18004 ; NoVLX: # %bb.0: # %entry
18005 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
18006 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18007 ; NoVLX-NEXT: kmovw %edi, %k1
18008 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18009 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18010 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18011 ; NoVLX-NEXT: kmovw %k0, %eax
18012 ; NoVLX-NEXT: vzeroupper
18015 %0 = bitcast <2 x i64> %__a to <2 x i64>
18016 %1 = bitcast <2 x i64> %__b to <2 x i64>
18017 %2 = icmp ult <2 x i64> %0, %1
18018 %3 = bitcast i8 %__u to <8 x i1>
18019 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18020 %4 = and <2 x i1> %2, %extract.i
18021 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18022 %6 = bitcast <64 x i1> %5 to i64
18026 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18027 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18028 ; VLX: # %bb.0: # %entry
18029 ; VLX-NEXT: kmovd %edi, %k1
18030 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
18031 ; VLX-NEXT: kmovq %k0, %rax
18034 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18035 ; NoVLX: # %bb.0: # %entry
18036 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18037 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
18038 ; NoVLX-NEXT: kmovw %edi, %k1
18039 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18040 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18041 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18042 ; NoVLX-NEXT: kmovw %k0, %eax
18043 ; NoVLX-NEXT: vzeroupper
18046 %0 = bitcast <2 x i64> %__a to <2 x i64>
18047 %load = load <2 x i64>, <2 x i64>* %__b
18048 %1 = bitcast <2 x i64> %load to <2 x i64>
18049 %2 = icmp ult <2 x i64> %0, %1
18050 %3 = bitcast i8 %__u to <8 x i1>
18051 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18052 %4 = and <2 x i1> %2, %extract.i
18053 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18054 %6 = bitcast <64 x i1> %5 to i64
18059 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
18060 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18061 ; VLX: # %bb.0: # %entry
18062 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
18063 ; VLX-NEXT: kmovq %k0, %rax
18066 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18067 ; NoVLX: # %bb.0: # %entry
18068 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18069 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18070 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18071 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18072 ; NoVLX-NEXT: kmovw %k0, %eax
18073 ; NoVLX-NEXT: vzeroupper
18076 %0 = bitcast <2 x i64> %__a to <2 x i64>
18077 %load = load i64, i64* %__b
18078 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18079 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18080 %2 = icmp ult <2 x i64> %0, %1
18081 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18082 %4 = bitcast <64 x i1> %3 to i64
18086 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
18087 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18088 ; VLX: # %bb.0: # %entry
18089 ; VLX-NEXT: kmovd %edi, %k1
18090 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18091 ; VLX-NEXT: kmovq %k0, %rax
18094 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18095 ; NoVLX: # %bb.0: # %entry
18096 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18097 ; NoVLX-NEXT: kmovw %edi, %k1
18098 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18099 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18100 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18101 ; NoVLX-NEXT: kmovw %k0, %eax
18102 ; NoVLX-NEXT: vzeroupper
18105 %0 = bitcast <2 x i64> %__a to <2 x i64>
18106 %load = load i64, i64* %__b
18107 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18108 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18109 %2 = icmp ult <2 x i64> %0, %1
18110 %3 = bitcast i8 %__u to <8 x i1>
18111 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18112 %4 = and <2 x i1> %extract.i, %2
18113 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18114 %6 = bitcast <64 x i1> %5 to i64
18119 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18120 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18121 ; VLX: # %bb.0: # %entry
18122 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18123 ; VLX-NEXT: kmovd %k0, %eax
18124 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18125 ; VLX-NEXT: vzeroupper
18128 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18129 ; NoVLX: # %bb.0: # %entry
18130 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18131 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18132 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18133 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18134 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18135 ; NoVLX-NEXT: kmovw %k0, %eax
18136 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18137 ; NoVLX-NEXT: vzeroupper
18140 %0 = bitcast <4 x i64> %__a to <4 x i64>
18141 %1 = bitcast <4 x i64> %__b to <4 x i64>
18142 %2 = icmp ult <4 x i64> %0, %1
18143 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18144 %4 = bitcast <8 x i1> %3 to i8
18148 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18149 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18150 ; VLX: # %bb.0: # %entry
18151 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18152 ; VLX-NEXT: kmovd %k0, %eax
18153 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18154 ; VLX-NEXT: vzeroupper
18157 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18158 ; NoVLX: # %bb.0: # %entry
18159 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18160 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18161 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18162 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18163 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18164 ; NoVLX-NEXT: kmovw %k0, %eax
18165 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18166 ; NoVLX-NEXT: vzeroupper
18169 %0 = bitcast <4 x i64> %__a to <4 x i64>
18170 %load = load <4 x i64>, <4 x i64>* %__b
18171 %1 = bitcast <4 x i64> %load to <4 x i64>
18172 %2 = icmp ult <4 x i64> %0, %1
18173 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18174 %4 = bitcast <8 x i1> %3 to i8
18178 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18179 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18180 ; VLX: # %bb.0: # %entry
18181 ; VLX-NEXT: kmovd %edi, %k1
18182 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18183 ; VLX-NEXT: kmovd %k0, %eax
18184 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18185 ; VLX-NEXT: vzeroupper
18188 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18189 ; NoVLX: # %bb.0: # %entry
18190 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18191 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18192 ; NoVLX-NEXT: kmovw %edi, %k1
18193 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18194 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18195 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18196 ; NoVLX-NEXT: kmovw %k0, %eax
18197 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18198 ; NoVLX-NEXT: vzeroupper
18201 %0 = bitcast <4 x i64> %__a to <4 x i64>
18202 %1 = bitcast <4 x i64> %__b to <4 x i64>
18203 %2 = icmp ult <4 x i64> %0, %1
18204 %3 = bitcast i8 %__u to <8 x i1>
18205 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18206 %4 = and <4 x i1> %2, %extract.i
18207 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18208 %6 = bitcast <8 x i1> %5 to i8
18212 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18213 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18214 ; VLX: # %bb.0: # %entry
18215 ; VLX-NEXT: kmovd %edi, %k1
18216 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18217 ; VLX-NEXT: kmovd %k0, %eax
18218 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18219 ; VLX-NEXT: vzeroupper
18222 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18223 ; NoVLX: # %bb.0: # %entry
18224 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18225 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18226 ; NoVLX-NEXT: kmovw %edi, %k1
18227 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18228 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18229 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18230 ; NoVLX-NEXT: kmovw %k0, %eax
18231 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18232 ; NoVLX-NEXT: vzeroupper
18235 %0 = bitcast <4 x i64> %__a to <4 x i64>
18236 %load = load <4 x i64>, <4 x i64>* %__b
18237 %1 = bitcast <4 x i64> %load to <4 x i64>
18238 %2 = icmp ult <4 x i64> %0, %1
18239 %3 = bitcast i8 %__u to <8 x i1>
18240 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18241 %4 = and <4 x i1> %2, %extract.i
18242 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18243 %6 = bitcast <8 x i1> %5 to i8
18248 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18249 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18250 ; VLX: # %bb.0: # %entry
18251 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18252 ; VLX-NEXT: kmovd %k0, %eax
18253 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18254 ; VLX-NEXT: vzeroupper
18257 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18258 ; NoVLX: # %bb.0: # %entry
18259 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18260 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18261 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18262 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18263 ; NoVLX-NEXT: kmovw %k0, %eax
18264 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18265 ; NoVLX-NEXT: vzeroupper
18268 %0 = bitcast <4 x i64> %__a to <4 x i64>
18269 %load = load i64, i64* %__b
18270 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18271 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18272 %2 = icmp ult <4 x i64> %0, %1
18273 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18274 %4 = bitcast <8 x i1> %3 to i8
18278 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18279 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18280 ; VLX: # %bb.0: # %entry
18281 ; VLX-NEXT: kmovd %edi, %k1
18282 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18283 ; VLX-NEXT: kmovd %k0, %eax
18284 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18285 ; VLX-NEXT: vzeroupper
18288 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18289 ; NoVLX: # %bb.0: # %entry
18290 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18291 ; NoVLX-NEXT: kmovw %edi, %k1
18292 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18293 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18294 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18295 ; NoVLX-NEXT: kmovw %k0, %eax
18296 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18297 ; NoVLX-NEXT: vzeroupper
18300 %0 = bitcast <4 x i64> %__a to <4 x i64>
18301 %load = load i64, i64* %__b
18302 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18303 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18304 %2 = icmp ult <4 x i64> %0, %1
18305 %3 = bitcast i8 %__u to <8 x i1>
18306 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18307 %4 = and <4 x i1> %extract.i, %2
18308 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18309 %6 = bitcast <8 x i1> %5 to i8
18314 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18315 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18316 ; VLX: # %bb.0: # %entry
18317 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18318 ; VLX-NEXT: kmovd %k0, %eax
18319 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18320 ; VLX-NEXT: vzeroupper
18323 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18324 ; NoVLX: # %bb.0: # %entry
18325 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18326 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18327 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18328 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18329 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18330 ; NoVLX-NEXT: kmovw %k0, %eax
18331 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18332 ; NoVLX-NEXT: vzeroupper
18335 %0 = bitcast <4 x i64> %__a to <4 x i64>
18336 %1 = bitcast <4 x i64> %__b to <4 x i64>
18337 %2 = icmp ult <4 x i64> %0, %1
18338 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18339 %4 = bitcast <16 x i1> %3 to i16
18343 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18344 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18345 ; VLX: # %bb.0: # %entry
18346 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18347 ; VLX-NEXT: kmovd %k0, %eax
18348 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18349 ; VLX-NEXT: vzeroupper
18352 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18353 ; NoVLX: # %bb.0: # %entry
18354 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18355 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18356 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18357 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18358 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18359 ; NoVLX-NEXT: kmovw %k0, %eax
18360 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18361 ; NoVLX-NEXT: vzeroupper
18364 %0 = bitcast <4 x i64> %__a to <4 x i64>
18365 %load = load <4 x i64>, <4 x i64>* %__b
18366 %1 = bitcast <4 x i64> %load to <4 x i64>
18367 %2 = icmp ult <4 x i64> %0, %1
18368 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18369 %4 = bitcast <16 x i1> %3 to i16
18373 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18374 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18375 ; VLX: # %bb.0: # %entry
18376 ; VLX-NEXT: kmovd %edi, %k1
18377 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18378 ; VLX-NEXT: kmovd %k0, %eax
18379 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18380 ; VLX-NEXT: vzeroupper
18383 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18384 ; NoVLX: # %bb.0: # %entry
18385 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18386 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18387 ; NoVLX-NEXT: kmovw %edi, %k1
18388 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18389 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18390 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18391 ; NoVLX-NEXT: kmovw %k0, %eax
18392 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18393 ; NoVLX-NEXT: vzeroupper
18396 %0 = bitcast <4 x i64> %__a to <4 x i64>
18397 %1 = bitcast <4 x i64> %__b to <4 x i64>
18398 %2 = icmp ult <4 x i64> %0, %1
18399 %3 = bitcast i8 %__u to <8 x i1>
18400 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18401 %4 = and <4 x i1> %2, %extract.i
18402 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18403 %6 = bitcast <16 x i1> %5 to i16
18407 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18408 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18409 ; VLX: # %bb.0: # %entry
18410 ; VLX-NEXT: kmovd %edi, %k1
18411 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18412 ; VLX-NEXT: kmovd %k0, %eax
18413 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18414 ; VLX-NEXT: vzeroupper
18417 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18418 ; NoVLX: # %bb.0: # %entry
18419 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18420 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18421 ; NoVLX-NEXT: kmovw %edi, %k1
18422 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18423 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18424 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18425 ; NoVLX-NEXT: kmovw %k0, %eax
18426 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18427 ; NoVLX-NEXT: vzeroupper
18430 %0 = bitcast <4 x i64> %__a to <4 x i64>
18431 %load = load <4 x i64>, <4 x i64>* %__b
18432 %1 = bitcast <4 x i64> %load to <4 x i64>
18433 %2 = icmp ult <4 x i64> %0, %1
18434 %3 = bitcast i8 %__u to <8 x i1>
18435 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18436 %4 = and <4 x i1> %2, %extract.i
18437 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18438 %6 = bitcast <16 x i1> %5 to i16
18443 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18444 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18445 ; VLX: # %bb.0: # %entry
18446 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18447 ; VLX-NEXT: kmovd %k0, %eax
18448 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18449 ; VLX-NEXT: vzeroupper
18452 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18453 ; NoVLX: # %bb.0: # %entry
18454 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18455 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18456 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18457 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18458 ; NoVLX-NEXT: kmovw %k0, %eax
18459 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18460 ; NoVLX-NEXT: vzeroupper
18463 %0 = bitcast <4 x i64> %__a to <4 x i64>
18464 %load = load i64, i64* %__b
18465 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18466 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18467 %2 = icmp ult <4 x i64> %0, %1
18468 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18469 %4 = bitcast <16 x i1> %3 to i16
18473 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18474 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18475 ; VLX: # %bb.0: # %entry
18476 ; VLX-NEXT: kmovd %edi, %k1
18477 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18478 ; VLX-NEXT: kmovd %k0, %eax
18479 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18480 ; VLX-NEXT: vzeroupper
18483 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18484 ; NoVLX: # %bb.0: # %entry
18485 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18486 ; NoVLX-NEXT: kmovw %edi, %k1
18487 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18488 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18489 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18490 ; NoVLX-NEXT: kmovw %k0, %eax
18491 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18492 ; NoVLX-NEXT: vzeroupper
18495 %0 = bitcast <4 x i64> %__a to <4 x i64>
18496 %load = load i64, i64* %__b
18497 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18498 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18499 %2 = icmp ult <4 x i64> %0, %1
18500 %3 = bitcast i8 %__u to <8 x i1>
18501 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18502 %4 = and <4 x i1> %extract.i, %2
18503 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18504 %6 = bitcast <16 x i1> %5 to i16
18509 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18510 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18511 ; VLX: # %bb.0: # %entry
18512 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18513 ; VLX-NEXT: kmovd %k0, %eax
18514 ; VLX-NEXT: vzeroupper
18517 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18518 ; NoVLX: # %bb.0: # %entry
18519 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18520 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18521 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18522 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18523 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18524 ; NoVLX-NEXT: kmovw %k0, %eax
18525 ; NoVLX-NEXT: vzeroupper
18528 %0 = bitcast <4 x i64> %__a to <4 x i64>
18529 %1 = bitcast <4 x i64> %__b to <4 x i64>
18530 %2 = icmp ult <4 x i64> %0, %1
18531 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18532 %4 = bitcast <32 x i1> %3 to i32
18536 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18537 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18538 ; VLX: # %bb.0: # %entry
18539 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18540 ; VLX-NEXT: kmovd %k0, %eax
18541 ; VLX-NEXT: vzeroupper
18544 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18545 ; NoVLX: # %bb.0: # %entry
18546 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18547 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18548 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18549 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18550 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18551 ; NoVLX-NEXT: kmovw %k0, %eax
18552 ; NoVLX-NEXT: vzeroupper
18555 %0 = bitcast <4 x i64> %__a to <4 x i64>
18556 %load = load <4 x i64>, <4 x i64>* %__b
18557 %1 = bitcast <4 x i64> %load to <4 x i64>
18558 %2 = icmp ult <4 x i64> %0, %1
18559 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18560 %4 = bitcast <32 x i1> %3 to i32
18564 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18565 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18566 ; VLX: # %bb.0: # %entry
18567 ; VLX-NEXT: kmovd %edi, %k1
18568 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18569 ; VLX-NEXT: kmovd %k0, %eax
18570 ; VLX-NEXT: vzeroupper
18573 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18574 ; NoVLX: # %bb.0: # %entry
18575 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18576 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18577 ; NoVLX-NEXT: kmovw %edi, %k1
18578 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18579 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18580 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18581 ; NoVLX-NEXT: kmovw %k0, %eax
18582 ; NoVLX-NEXT: vzeroupper
18585 %0 = bitcast <4 x i64> %__a to <4 x i64>
18586 %1 = bitcast <4 x i64> %__b to <4 x i64>
18587 %2 = icmp ult <4 x i64> %0, %1
18588 %3 = bitcast i8 %__u to <8 x i1>
18589 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18590 %4 = and <4 x i1> %2, %extract.i
18591 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18592 %6 = bitcast <32 x i1> %5 to i32
18596 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18597 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18598 ; VLX: # %bb.0: # %entry
18599 ; VLX-NEXT: kmovd %edi, %k1
18600 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18601 ; VLX-NEXT: kmovd %k0, %eax
18602 ; VLX-NEXT: vzeroupper
18605 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18606 ; NoVLX: # %bb.0: # %entry
18607 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18608 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18609 ; NoVLX-NEXT: kmovw %edi, %k1
18610 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18611 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18612 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18613 ; NoVLX-NEXT: kmovw %k0, %eax
18614 ; NoVLX-NEXT: vzeroupper
18617 %0 = bitcast <4 x i64> %__a to <4 x i64>
18618 %load = load <4 x i64>, <4 x i64>* %__b
18619 %1 = bitcast <4 x i64> %load to <4 x i64>
18620 %2 = icmp ult <4 x i64> %0, %1
18621 %3 = bitcast i8 %__u to <8 x i1>
18622 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18623 %4 = and <4 x i1> %2, %extract.i
18624 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18625 %6 = bitcast <32 x i1> %5 to i32
18630 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18631 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
18632 ; VLX: # %bb.0: # %entry
18633 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18634 ; VLX-NEXT: kmovd %k0, %eax
18635 ; VLX-NEXT: vzeroupper
18638 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
18639 ; NoVLX: # %bb.0: # %entry
18640 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18641 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18642 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18643 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18644 ; NoVLX-NEXT: kmovw %k0, %eax
18645 ; NoVLX-NEXT: vzeroupper
18648 %0 = bitcast <4 x i64> %__a to <4 x i64>
18649 %load = load i64, i64* %__b
18650 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18651 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18652 %2 = icmp ult <4 x i64> %0, %1
18653 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18654 %4 = bitcast <32 x i1> %3 to i32
18658 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18659 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
18660 ; VLX: # %bb.0: # %entry
18661 ; VLX-NEXT: kmovd %edi, %k1
18662 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18663 ; VLX-NEXT: kmovd %k0, %eax
18664 ; VLX-NEXT: vzeroupper
18667 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
18668 ; NoVLX: # %bb.0: # %entry
18669 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18670 ; NoVLX-NEXT: kmovw %edi, %k1
18671 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18672 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18673 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18674 ; NoVLX-NEXT: kmovw %k0, %eax
18675 ; NoVLX-NEXT: vzeroupper
18678 %0 = bitcast <4 x i64> %__a to <4 x i64>
18679 %load = load i64, i64* %__b
18680 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18681 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18682 %2 = icmp ult <4 x i64> %0, %1
18683 %3 = bitcast i8 %__u to <8 x i1>
18684 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18685 %4 = and <4 x i1> %extract.i, %2
18686 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18687 %6 = bitcast <32 x i1> %5 to i32
18692 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18693 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
18694 ; VLX: # %bb.0: # %entry
18695 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18696 ; VLX-NEXT: kmovq %k0, %rax
18697 ; VLX-NEXT: vzeroupper
18700 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
18701 ; NoVLX: # %bb.0: # %entry
18702 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18703 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18704 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18705 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18706 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18707 ; NoVLX-NEXT: kmovw %k0, %eax
18708 ; NoVLX-NEXT: vzeroupper
18711 %0 = bitcast <4 x i64> %__a to <4 x i64>
18712 %1 = bitcast <4 x i64> %__b to <4 x i64>
18713 %2 = icmp ult <4 x i64> %0, %1
18714 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18715 %4 = bitcast <64 x i1> %3 to i64
18719 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18720 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
18721 ; VLX: # %bb.0: # %entry
18722 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18723 ; VLX-NEXT: kmovq %k0, %rax
18724 ; VLX-NEXT: vzeroupper
18727 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
18728 ; NoVLX: # %bb.0: # %entry
18729 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18730 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18731 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18732 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18733 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18734 ; NoVLX-NEXT: kmovw %k0, %eax
18735 ; NoVLX-NEXT: vzeroupper
18738 %0 = bitcast <4 x i64> %__a to <4 x i64>
18739 %load = load <4 x i64>, <4 x i64>* %__b
18740 %1 = bitcast <4 x i64> %load to <4 x i64>
18741 %2 = icmp ult <4 x i64> %0, %1
18742 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18743 %4 = bitcast <64 x i1> %3 to i64
18747 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18748 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
18749 ; VLX: # %bb.0: # %entry
18750 ; VLX-NEXT: kmovd %edi, %k1
18751 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18752 ; VLX-NEXT: kmovq %k0, %rax
18753 ; VLX-NEXT: vzeroupper
18756 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
18757 ; NoVLX: # %bb.0: # %entry
18758 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18759 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18760 ; NoVLX-NEXT: kmovw %edi, %k1
18761 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18762 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18763 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18764 ; NoVLX-NEXT: kmovw %k0, %eax
18765 ; NoVLX-NEXT: vzeroupper
18768 %0 = bitcast <4 x i64> %__a to <4 x i64>
18769 %1 = bitcast <4 x i64> %__b to <4 x i64>
18770 %2 = icmp ult <4 x i64> %0, %1
18771 %3 = bitcast i8 %__u to <8 x i1>
18772 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18773 %4 = and <4 x i1> %2, %extract.i
18774 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18775 %6 = bitcast <64 x i1> %5 to i64
18779 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18780 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
18781 ; VLX: # %bb.0: # %entry
18782 ; VLX-NEXT: kmovd %edi, %k1
18783 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18784 ; VLX-NEXT: kmovq %k0, %rax
18785 ; VLX-NEXT: vzeroupper
18788 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
18789 ; NoVLX: # %bb.0: # %entry
18790 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18791 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18792 ; NoVLX-NEXT: kmovw %edi, %k1
18793 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18794 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18795 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18796 ; NoVLX-NEXT: kmovw %k0, %eax
18797 ; NoVLX-NEXT: vzeroupper
18800 %0 = bitcast <4 x i64> %__a to <4 x i64>
18801 %load = load <4 x i64>, <4 x i64>* %__b
18802 %1 = bitcast <4 x i64> %load to <4 x i64>
18803 %2 = icmp ult <4 x i64> %0, %1
18804 %3 = bitcast i8 %__u to <8 x i1>
18805 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18806 %4 = and <4 x i1> %2, %extract.i
18807 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18808 %6 = bitcast <64 x i1> %5 to i64
18813 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18814 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
18815 ; VLX: # %bb.0: # %entry
18816 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18817 ; VLX-NEXT: kmovq %k0, %rax
18818 ; VLX-NEXT: vzeroupper
18821 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
18822 ; NoVLX: # %bb.0: # %entry
18823 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18824 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18825 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18826 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18827 ; NoVLX-NEXT: kmovw %k0, %eax
18828 ; NoVLX-NEXT: vzeroupper
18831 %0 = bitcast <4 x i64> %__a to <4 x i64>
18832 %load = load i64, i64* %__b
18833 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18834 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18835 %2 = icmp ult <4 x i64> %0, %1
18836 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18837 %4 = bitcast <64 x i1> %3 to i64
18841 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18842 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
18843 ; VLX: # %bb.0: # %entry
18844 ; VLX-NEXT: kmovd %edi, %k1
18845 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18846 ; VLX-NEXT: kmovq %k0, %rax
18847 ; VLX-NEXT: vzeroupper
18850 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
18851 ; NoVLX: # %bb.0: # %entry
18852 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18853 ; NoVLX-NEXT: kmovw %edi, %k1
18854 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18855 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18856 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18857 ; NoVLX-NEXT: kmovw %k0, %eax
18858 ; NoVLX-NEXT: vzeroupper
18861 %0 = bitcast <4 x i64> %__a to <4 x i64>
18862 %load = load i64, i64* %__b
18863 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18864 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18865 %2 = icmp ult <4 x i64> %0, %1
18866 %3 = bitcast i8 %__u to <8 x i1>
18867 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18868 %4 = and <4 x i1> %extract.i, %2
18869 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18870 %6 = bitcast <64 x i1> %5 to i64
18875 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
18876 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
18877 ; VLX: # %bb.0: # %entry
18878 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18879 ; VLX-NEXT: kmovd %k0, %eax
18880 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18881 ; VLX-NEXT: vzeroupper
18884 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
18885 ; NoVLX: # %bb.0: # %entry
18886 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18887 ; NoVLX-NEXT: kmovw %k0, %eax
18888 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18889 ; NoVLX-NEXT: vzeroupper
18892 %0 = bitcast <8 x i64> %__a to <8 x i64>
18893 %1 = bitcast <8 x i64> %__b to <8 x i64>
18894 %2 = icmp ult <8 x i64> %0, %1
18895 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18896 %4 = bitcast <16 x i1> %3 to i16
18900 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
18901 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
18902 ; VLX: # %bb.0: # %entry
18903 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
18904 ; VLX-NEXT: kmovd %k0, %eax
18905 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18906 ; VLX-NEXT: vzeroupper
18909 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
18910 ; NoVLX: # %bb.0: # %entry
18911 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
18912 ; NoVLX-NEXT: kmovw %k0, %eax
18913 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18914 ; NoVLX-NEXT: vzeroupper
18917 %0 = bitcast <8 x i64> %__a to <8 x i64>
18918 %load = load <8 x i64>, <8 x i64>* %__b
18919 %1 = bitcast <8 x i64> %load to <8 x i64>
18920 %2 = icmp ult <8 x i64> %0, %1
18921 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18922 %4 = bitcast <16 x i1> %3 to i16
18926 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
18927 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
18928 ; VLX: # %bb.0: # %entry
18929 ; VLX-NEXT: kmovd %edi, %k1
18930 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18931 ; VLX-NEXT: kmovd %k0, %eax
18932 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18933 ; VLX-NEXT: vzeroupper
18936 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
18937 ; NoVLX: # %bb.0: # %entry
18938 ; NoVLX-NEXT: kmovw %edi, %k1
18939 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18940 ; NoVLX-NEXT: kmovw %k0, %eax
18941 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18942 ; NoVLX-NEXT: vzeroupper
18945 %0 = bitcast <8 x i64> %__a to <8 x i64>
18946 %1 = bitcast <8 x i64> %__b to <8 x i64>
18947 %2 = icmp ult <8 x i64> %0, %1
18948 %3 = bitcast i8 %__u to <8 x i1>
18949 %4 = and <8 x i1> %2, %3
18950 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18951 %6 = bitcast <16 x i1> %5 to i16
18955 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
18956 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
18957 ; VLX: # %bb.0: # %entry
18958 ; VLX-NEXT: kmovd %edi, %k1
18959 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
18960 ; VLX-NEXT: kmovd %k0, %eax
18961 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18962 ; VLX-NEXT: vzeroupper
18965 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
18966 ; NoVLX: # %bb.0: # %entry
18967 ; NoVLX-NEXT: kmovw %edi, %k1
18968 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
18969 ; NoVLX-NEXT: kmovw %k0, %eax
18970 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18971 ; NoVLX-NEXT: vzeroupper
18974 %0 = bitcast <8 x i64> %__a to <8 x i64>
18975 %load = load <8 x i64>, <8 x i64>* %__b
18976 %1 = bitcast <8 x i64> %load to <8 x i64>
18977 %2 = icmp ult <8 x i64> %0, %1
18978 %3 = bitcast i8 %__u to <8 x i1>
18979 %4 = and <8 x i1> %2, %3
18980 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18981 %6 = bitcast <16 x i1> %5 to i16
18986 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
18987 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
18988 ; VLX: # %bb.0: # %entry
18989 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18990 ; VLX-NEXT: kmovd %k0, %eax
18991 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18992 ; VLX-NEXT: vzeroupper
18995 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
18996 ; NoVLX: # %bb.0: # %entry
18997 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18998 ; NoVLX-NEXT: kmovw %k0, %eax
18999 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19000 ; NoVLX-NEXT: vzeroupper
19003 %0 = bitcast <8 x i64> %__a to <8 x i64>
19004 %load = load i64, i64* %__b
19005 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19006 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19007 %2 = icmp ult <8 x i64> %0, %1
19008 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19009 %4 = bitcast <16 x i1> %3 to i16
19013 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19014 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19015 ; VLX: # %bb.0: # %entry
19016 ; VLX-NEXT: kmovd %edi, %k1
19017 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19018 ; VLX-NEXT: kmovd %k0, %eax
19019 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19020 ; VLX-NEXT: vzeroupper
19023 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19024 ; NoVLX: # %bb.0: # %entry
19025 ; NoVLX-NEXT: kmovw %edi, %k1
19026 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19027 ; NoVLX-NEXT: kmovw %k0, %eax
19028 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19029 ; NoVLX-NEXT: vzeroupper
19032 %0 = bitcast <8 x i64> %__a to <8 x i64>
19033 %load = load i64, i64* %__b
19034 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19035 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19036 %2 = icmp ult <8 x i64> %0, %1
19037 %3 = bitcast i8 %__u to <8 x i1>
19038 %4 = and <8 x i1> %3, %2
19039 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19040 %6 = bitcast <16 x i1> %5 to i16
19045 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19046 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19047 ; VLX: # %bb.0: # %entry
19048 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19049 ; VLX-NEXT: kmovd %k0, %eax
19050 ; VLX-NEXT: vzeroupper
19053 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19054 ; NoVLX: # %bb.0: # %entry
19055 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19056 ; NoVLX-NEXT: kmovw %k0, %eax
19057 ; NoVLX-NEXT: vzeroupper
19060 %0 = bitcast <8 x i64> %__a to <8 x i64>
19061 %1 = bitcast <8 x i64> %__b to <8 x i64>
19062 %2 = icmp ult <8 x i64> %0, %1
19063 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19064 %4 = bitcast <32 x i1> %3 to i32
19068 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19069 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19070 ; VLX: # %bb.0: # %entry
19071 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19072 ; VLX-NEXT: kmovd %k0, %eax
19073 ; VLX-NEXT: vzeroupper
19076 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19077 ; NoVLX: # %bb.0: # %entry
19078 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19079 ; NoVLX-NEXT: kmovw %k0, %eax
19080 ; NoVLX-NEXT: vzeroupper
19083 %0 = bitcast <8 x i64> %__a to <8 x i64>
19084 %load = load <8 x i64>, <8 x i64>* %__b
19085 %1 = bitcast <8 x i64> %load to <8 x i64>
19086 %2 = icmp ult <8 x i64> %0, %1
19087 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19088 %4 = bitcast <32 x i1> %3 to i32
19092 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19093 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19094 ; VLX: # %bb.0: # %entry
19095 ; VLX-NEXT: kmovd %edi, %k1
19096 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19097 ; VLX-NEXT: kmovd %k0, %eax
19098 ; VLX-NEXT: vzeroupper
19101 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19102 ; NoVLX: # %bb.0: # %entry
19103 ; NoVLX-NEXT: kmovw %edi, %k1
19104 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19105 ; NoVLX-NEXT: kmovw %k0, %eax
19106 ; NoVLX-NEXT: vzeroupper
19109 %0 = bitcast <8 x i64> %__a to <8 x i64>
19110 %1 = bitcast <8 x i64> %__b to <8 x i64>
19111 %2 = icmp ult <8 x i64> %0, %1
19112 %3 = bitcast i8 %__u to <8 x i1>
19113 %4 = and <8 x i1> %2, %3
19114 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19115 %6 = bitcast <32 x i1> %5 to i32
19119 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19120 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19121 ; VLX: # %bb.0: # %entry
19122 ; VLX-NEXT: kmovd %edi, %k1
19123 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19124 ; VLX-NEXT: kmovd %k0, %eax
19125 ; VLX-NEXT: vzeroupper
19128 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19129 ; NoVLX: # %bb.0: # %entry
19130 ; NoVLX-NEXT: kmovw %edi, %k1
19131 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19132 ; NoVLX-NEXT: kmovw %k0, %eax
19133 ; NoVLX-NEXT: vzeroupper
19136 %0 = bitcast <8 x i64> %__a to <8 x i64>
19137 %load = load <8 x i64>, <8 x i64>* %__b
19138 %1 = bitcast <8 x i64> %load to <8 x i64>
19139 %2 = icmp ult <8 x i64> %0, %1
19140 %3 = bitcast i8 %__u to <8 x i1>
19141 %4 = and <8 x i1> %2, %3
19142 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19143 %6 = bitcast <32 x i1> %5 to i32
19148 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19149 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19150 ; VLX: # %bb.0: # %entry
19151 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19152 ; VLX-NEXT: kmovd %k0, %eax
19153 ; VLX-NEXT: vzeroupper
19156 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19157 ; NoVLX: # %bb.0: # %entry
19158 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19159 ; NoVLX-NEXT: kmovw %k0, %eax
19160 ; NoVLX-NEXT: vzeroupper
19163 %0 = bitcast <8 x i64> %__a to <8 x i64>
19164 %load = load i64, i64* %__b
19165 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19166 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19167 %2 = icmp ult <8 x i64> %0, %1
19168 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19169 %4 = bitcast <32 x i1> %3 to i32
19173 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19174 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19175 ; VLX: # %bb.0: # %entry
19176 ; VLX-NEXT: kmovd %edi, %k1
19177 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19178 ; VLX-NEXT: kmovd %k0, %eax
19179 ; VLX-NEXT: vzeroupper
19182 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19183 ; NoVLX: # %bb.0: # %entry
19184 ; NoVLX-NEXT: kmovw %edi, %k1
19185 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19186 ; NoVLX-NEXT: kmovw %k0, %eax
19187 ; NoVLX-NEXT: vzeroupper
19190 %0 = bitcast <8 x i64> %__a to <8 x i64>
19191 %load = load i64, i64* %__b
19192 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19193 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19194 %2 = icmp ult <8 x i64> %0, %1
19195 %3 = bitcast i8 %__u to <8 x i1>
19196 %4 = and <8 x i1> %3, %2
19197 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19198 %6 = bitcast <32 x i1> %5 to i32
19203 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19204 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19205 ; VLX: # %bb.0: # %entry
19206 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19207 ; VLX-NEXT: kmovq %k0, %rax
19208 ; VLX-NEXT: vzeroupper
19211 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19212 ; NoVLX: # %bb.0: # %entry
19213 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19214 ; NoVLX-NEXT: kmovw %k0, %eax
19215 ; NoVLX-NEXT: vzeroupper
19218 %0 = bitcast <8 x i64> %__a to <8 x i64>
19219 %1 = bitcast <8 x i64> %__b to <8 x i64>
19220 %2 = icmp ult <8 x i64> %0, %1
19221 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19222 %4 = bitcast <64 x i1> %3 to i64
19226 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19227 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19228 ; VLX: # %bb.0: # %entry
19229 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19230 ; VLX-NEXT: kmovq %k0, %rax
19231 ; VLX-NEXT: vzeroupper
19234 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19235 ; NoVLX: # %bb.0: # %entry
19236 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19237 ; NoVLX-NEXT: kmovw %k0, %eax
19238 ; NoVLX-NEXT: vzeroupper
19241 %0 = bitcast <8 x i64> %__a to <8 x i64>
19242 %load = load <8 x i64>, <8 x i64>* %__b
19243 %1 = bitcast <8 x i64> %load to <8 x i64>
19244 %2 = icmp ult <8 x i64> %0, %1
19245 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19246 %4 = bitcast <64 x i1> %3 to i64
19250 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19251 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19252 ; VLX: # %bb.0: # %entry
19253 ; VLX-NEXT: kmovd %edi, %k1
19254 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19255 ; VLX-NEXT: kmovq %k0, %rax
19256 ; VLX-NEXT: vzeroupper
19259 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19260 ; NoVLX: # %bb.0: # %entry
19261 ; NoVLX-NEXT: kmovw %edi, %k1
19262 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19263 ; NoVLX-NEXT: kmovw %k0, %eax
19264 ; NoVLX-NEXT: vzeroupper
19267 %0 = bitcast <8 x i64> %__a to <8 x i64>
19268 %1 = bitcast <8 x i64> %__b to <8 x i64>
19269 %2 = icmp ult <8 x i64> %0, %1
19270 %3 = bitcast i8 %__u to <8 x i1>
19271 %4 = and <8 x i1> %2, %3
19272 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19273 %6 = bitcast <64 x i1> %5 to i64
19277 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19278 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19279 ; VLX: # %bb.0: # %entry
19280 ; VLX-NEXT: kmovd %edi, %k1
19281 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19282 ; VLX-NEXT: kmovq %k0, %rax
19283 ; VLX-NEXT: vzeroupper
19286 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19287 ; NoVLX: # %bb.0: # %entry
19288 ; NoVLX-NEXT: kmovw %edi, %k1
19289 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19290 ; NoVLX-NEXT: kmovw %k0, %eax
19291 ; NoVLX-NEXT: vzeroupper
19294 %0 = bitcast <8 x i64> %__a to <8 x i64>
19295 %load = load <8 x i64>, <8 x i64>* %__b
19296 %1 = bitcast <8 x i64> %load to <8 x i64>
19297 %2 = icmp ult <8 x i64> %0, %1
19298 %3 = bitcast i8 %__u to <8 x i1>
19299 %4 = and <8 x i1> %2, %3
19300 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19301 %6 = bitcast <64 x i1> %5 to i64
19306 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19307 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19308 ; VLX: # %bb.0: # %entry
19309 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19310 ; VLX-NEXT: kmovq %k0, %rax
19311 ; VLX-NEXT: vzeroupper
19314 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19315 ; NoVLX: # %bb.0: # %entry
19316 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19317 ; NoVLX-NEXT: kmovw %k0, %eax
19318 ; NoVLX-NEXT: vzeroupper
19321 %0 = bitcast <8 x i64> %__a to <8 x i64>
19322 %load = load i64, i64* %__b
19323 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19324 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19325 %2 = icmp ult <8 x i64> %0, %1
19326 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19327 %4 = bitcast <64 x i1> %3 to i64
19331 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19332 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19333 ; VLX: # %bb.0: # %entry
19334 ; VLX-NEXT: kmovd %edi, %k1
19335 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19336 ; VLX-NEXT: kmovq %k0, %rax
19337 ; VLX-NEXT: vzeroupper
19340 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19341 ; NoVLX: # %bb.0: # %entry
19342 ; NoVLX-NEXT: kmovw %edi, %k1
19343 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19344 ; NoVLX-NEXT: kmovw %k0, %eax
19345 ; NoVLX-NEXT: vzeroupper
19348 %0 = bitcast <8 x i64> %__a to <8 x i64>
19349 %load = load i64, i64* %__b
19350 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19351 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19352 %2 = icmp ult <8 x i64> %0, %1
19353 %3 = bitcast i8 %__u to <8 x i1>
19354 %4 = and <8 x i1> %3, %2
19355 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19356 %6 = bitcast <64 x i1> %5 to i64
19361 declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
19362 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19363 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19364 ; VLX: # %bb.0: # %entry
19365 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19366 ; VLX-NEXT: kmovd %k0, %eax
19367 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19370 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19371 ; NoVLX: # %bb.0: # %entry
19372 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19373 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19374 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19375 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19376 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19377 ; NoVLX-NEXT: kmovw %k0, %eax
19378 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19379 ; NoVLX-NEXT: vzeroupper
19382 %0 = bitcast <2 x i64> %__a to <4 x float>
19383 %1 = bitcast <2 x i64> %__b to <4 x float>
19384 %2 = fcmp oeq <4 x float> %0, %1
19385 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19386 %4 = bitcast <8 x i1> %3 to i8
19390 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19391 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19392 ; VLX: # %bb.0: # %entry
19393 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19394 ; VLX-NEXT: kmovd %k0, %eax
19395 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19398 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19399 ; NoVLX: # %bb.0: # %entry
19400 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19401 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19402 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19403 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19404 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19405 ; NoVLX-NEXT: kmovw %k0, %eax
19406 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19407 ; NoVLX-NEXT: vzeroupper
19410 %0 = bitcast <2 x i64> %__a to <4 x float>
19411 %load = load <2 x i64>, <2 x i64>* %__b
19412 %1 = bitcast <2 x i64> %load to <4 x float>
19413 %2 = fcmp oeq <4 x float> %0, %1
19414 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19415 %4 = bitcast <8 x i1> %3 to i8
19419 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
19420 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19421 ; VLX: # %bb.0: # %entry
19422 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19423 ; VLX-NEXT: kmovd %k0, %eax
19424 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19427 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19428 ; NoVLX: # %bb.0: # %entry
19429 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19430 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
19431 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19432 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19433 ; NoVLX-NEXT: kmovw %k0, %eax
19434 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19435 ; NoVLX-NEXT: vzeroupper
19438 %0 = bitcast <2 x i64> %__a to <4 x float>
19439 %load = load float, float* %__b
19440 %vec = insertelement <4 x float> undef, float %load, i32 0
19441 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19442 %2 = fcmp oeq <4 x float> %0, %1
19443 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19444 %4 = bitcast <8 x i1> %3 to i8
19448 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19449 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19450 ; VLX: # %bb.0: # %entry
19451 ; VLX-NEXT: kmovd %edi, %k1
19452 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19453 ; VLX-NEXT: kmovd %k0, %eax
19454 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19457 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19458 ; NoVLX: # %bb.0: # %entry
19459 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19460 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19461 ; NoVLX-NEXT: kmovw %edi, %k1
19462 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19463 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19464 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19465 ; NoVLX-NEXT: kmovw %k0, %eax
19466 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19467 ; NoVLX-NEXT: vzeroupper
19470 %0 = bitcast <2 x i64> %__a to <4 x float>
19471 %1 = bitcast <2 x i64> %__b to <4 x float>
19472 %2 = fcmp oeq <4 x float> %0, %1
19473 %3 = bitcast i4 %__u to <4 x i1>
19474 %4 = and <4 x i1> %2, %3
19475 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19476 %6 = bitcast <8 x i1> %5 to i8
19480 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19481 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19482 ; VLX: # %bb.0: # %entry
19483 ; VLX-NEXT: kmovd %edi, %k1
19484 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19485 ; VLX-NEXT: kmovd %k0, %eax
19486 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19489 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19490 ; NoVLX: # %bb.0: # %entry
19491 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19492 ; NoVLX-NEXT: kmovw %edi, %k1
19493 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19494 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19495 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19496 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19497 ; NoVLX-NEXT: kmovw %k0, %eax
19498 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19499 ; NoVLX-NEXT: vzeroupper
19502 %0 = bitcast <2 x i64> %__a to <4 x float>
19503 %load = load <2 x i64>, <2 x i64>* %__b
19504 %1 = bitcast <2 x i64> %load to <4 x float>
19505 %2 = fcmp oeq <4 x float> %0, %1
19506 %3 = bitcast i4 %__u to <4 x i1>
19507 %4 = and <4 x i1> %2, %3
19508 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19509 %6 = bitcast <8 x i1> %5 to i8
19513 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
19514 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19515 ; VLX: # %bb.0: # %entry
19516 ; VLX-NEXT: kmovd %edi, %k1
19517 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19518 ; VLX-NEXT: kmovd %k0, %eax
19519 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19522 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19523 ; NoVLX: # %bb.0: # %entry
19524 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19525 ; NoVLX-NEXT: kmovw %edi, %k1
19526 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
19527 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19528 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19529 ; NoVLX-NEXT: kmovw %k0, %eax
19530 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19531 ; NoVLX-NEXT: vzeroupper
19534 %0 = bitcast <2 x i64> %__a to <4 x float>
19535 %load = load float, float* %__b
19536 %vec = insertelement <4 x float> undef, float %load, i32 0
19537 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19538 %2 = fcmp oeq <4 x float> %0, %1
19539 %3 = bitcast i4 %__u to <4 x i1>
19540 %4 = and <4 x i1> %2, %3
19541 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19542 %6 = bitcast <8 x i1> %5 to i8
19548 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19549 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19550 ; VLX: # %bb.0: # %entry
19551 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19552 ; VLX-NEXT: kmovd %k0, %eax
19553 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19556 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19557 ; NoVLX: # %bb.0: # %entry
19558 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19559 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19560 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19561 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19562 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19563 ; NoVLX-NEXT: kmovw %k0, %eax
19564 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19565 ; NoVLX-NEXT: vzeroupper
19568 %0 = bitcast <2 x i64> %__a to <4 x float>
19569 %1 = bitcast <2 x i64> %__b to <4 x float>
19570 %2 = fcmp oeq <4 x float> %0, %1
19571 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19572 %4 = bitcast <16 x i1> %3 to i16
19576 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19577 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19578 ; VLX: # %bb.0: # %entry
19579 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19580 ; VLX-NEXT: kmovd %k0, %eax
19581 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19584 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19585 ; NoVLX: # %bb.0: # %entry
19586 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19587 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19588 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19589 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19590 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19591 ; NoVLX-NEXT: kmovw %k0, %eax
19592 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19593 ; NoVLX-NEXT: vzeroupper
19596 %0 = bitcast <2 x i64> %__a to <4 x float>
19597 %load = load <2 x i64>, <2 x i64>* %__b
19598 %1 = bitcast <2 x i64> %load to <4 x float>
19599 %2 = fcmp oeq <4 x float> %0, %1
19600 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19601 %4 = bitcast <16 x i1> %3 to i16
19605 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
19606 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19607 ; VLX: # %bb.0: # %entry
19608 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19609 ; VLX-NEXT: kmovd %k0, %eax
19610 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19613 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19614 ; NoVLX: # %bb.0: # %entry
19615 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19616 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
19617 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19618 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19619 ; NoVLX-NEXT: kmovw %k0, %eax
19620 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19621 ; NoVLX-NEXT: vzeroupper
19624 %0 = bitcast <2 x i64> %__a to <4 x float>
19625 %load = load float, float* %__b
19626 %vec = insertelement <4 x float> undef, float %load, i32 0
19627 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19628 %2 = fcmp oeq <4 x float> %0, %1
19629 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19630 %4 = bitcast <16 x i1> %3 to i16
19634 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19635 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
19636 ; VLX: # %bb.0: # %entry
19637 ; VLX-NEXT: kmovd %edi, %k1
19638 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19639 ; VLX-NEXT: kmovd %k0, %eax
19640 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19643 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
19644 ; NoVLX: # %bb.0: # %entry
19645 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19646 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19647 ; NoVLX-NEXT: kmovw %edi, %k1
19648 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19649 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19650 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19651 ; NoVLX-NEXT: kmovw %k0, %eax
19652 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19653 ; NoVLX-NEXT: vzeroupper
19656 %0 = bitcast <2 x i64> %__a to <4 x float>
19657 %1 = bitcast <2 x i64> %__b to <4 x float>
19658 %2 = fcmp oeq <4 x float> %0, %1
19659 %3 = bitcast i4 %__u to <4 x i1>
19660 %4 = and <4 x i1> %2, %3
19661 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19662 %6 = bitcast <16 x i1> %5 to i16
19666 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19667 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
19668 ; VLX: # %bb.0: # %entry
19669 ; VLX-NEXT: kmovd %edi, %k1
19670 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19671 ; VLX-NEXT: kmovd %k0, %eax
19672 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19675 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
19676 ; NoVLX: # %bb.0: # %entry
19677 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19678 ; NoVLX-NEXT: kmovw %edi, %k1
19679 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19680 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19681 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19682 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19683 ; NoVLX-NEXT: kmovw %k0, %eax
19684 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19685 ; NoVLX-NEXT: vzeroupper
19688 %0 = bitcast <2 x i64> %__a to <4 x float>
19689 %load = load <2 x i64>, <2 x i64>* %__b
19690 %1 = bitcast <2 x i64> %load to <4 x float>
19691 %2 = fcmp oeq <4 x float> %0, %1
19692 %3 = bitcast i4 %__u to <4 x i1>
19693 %4 = and <4 x i1> %2, %3
19694 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19695 %6 = bitcast <16 x i1> %5 to i16
19699 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
19700 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19701 ; VLX: # %bb.0: # %entry
19702 ; VLX-NEXT: kmovd %edi, %k1
19703 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19704 ; VLX-NEXT: kmovd %k0, %eax
19705 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19708 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19709 ; NoVLX: # %bb.0: # %entry
19710 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19711 ; NoVLX-NEXT: kmovw %edi, %k1
19712 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
19713 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19714 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19715 ; NoVLX-NEXT: kmovw %k0, %eax
19716 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19717 ; NoVLX-NEXT: vzeroupper
19720 %0 = bitcast <2 x i64> %__a to <4 x float>
19721 %load = load float, float* %__b
19722 %vec = insertelement <4 x float> undef, float %load, i32 0
19723 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19724 %2 = fcmp oeq <4 x float> %0, %1
19725 %3 = bitcast i4 %__u to <4 x i1>
19726 %4 = and <4 x i1> %2, %3
19727 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19728 %6 = bitcast <16 x i1> %5 to i16
19734 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19735 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
19736 ; VLX: # %bb.0: # %entry
19737 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19738 ; VLX-NEXT: kmovd %k0, %eax
19741 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
19742 ; NoVLX: # %bb.0: # %entry
19743 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19744 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19745 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19746 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19747 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19748 ; NoVLX-NEXT: kmovw %k0, %eax
19749 ; NoVLX-NEXT: vzeroupper
19752 %0 = bitcast <2 x i64> %__a to <4 x float>
19753 %1 = bitcast <2 x i64> %__b to <4 x float>
19754 %2 = fcmp oeq <4 x float> %0, %1
19755 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19756 %4 = bitcast <32 x i1> %3 to i32
19760 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19761 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
19762 ; VLX: # %bb.0: # %entry
19763 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19764 ; VLX-NEXT: kmovd %k0, %eax
19767 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
19768 ; NoVLX: # %bb.0: # %entry
19769 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19770 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19771 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19772 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19773 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19774 ; NoVLX-NEXT: kmovw %k0, %eax
19775 ; NoVLX-NEXT: vzeroupper
19778 %0 = bitcast <2 x i64> %__a to <4 x float>
19779 %load = load <2 x i64>, <2 x i64>* %__b
19780 %1 = bitcast <2 x i64> %load to <4 x float>
19781 %2 = fcmp oeq <4 x float> %0, %1
19782 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19783 %4 = bitcast <32 x i1> %3 to i32
19787 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
19788 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19789 ; VLX: # %bb.0: # %entry
19790 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19791 ; VLX-NEXT: kmovd %k0, %eax
19794 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19795 ; NoVLX: # %bb.0: # %entry
19796 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19797 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
19798 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19799 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19800 ; NoVLX-NEXT: kmovw %k0, %eax
19801 ; NoVLX-NEXT: vzeroupper
19804 %0 = bitcast <2 x i64> %__a to <4 x float>
19805 %load = load float, float* %__b
19806 %vec = insertelement <4 x float> undef, float %load, i32 0
19807 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19808 %2 = fcmp oeq <4 x float> %0, %1
19809 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19810 %4 = bitcast <32 x i1> %3 to i32
19814 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19815 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
19816 ; VLX: # %bb.0: # %entry
19817 ; VLX-NEXT: kmovd %edi, %k1
19818 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19819 ; VLX-NEXT: kmovd %k0, %eax
19822 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
19823 ; NoVLX: # %bb.0: # %entry
19824 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19825 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19826 ; NoVLX-NEXT: kmovw %edi, %k1
19827 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19828 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19829 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19830 ; NoVLX-NEXT: kmovw %k0, %eax
19831 ; NoVLX-NEXT: vzeroupper
19834 %0 = bitcast <2 x i64> %__a to <4 x float>
19835 %1 = bitcast <2 x i64> %__b to <4 x float>
19836 %2 = fcmp oeq <4 x float> %0, %1
19837 %3 = bitcast i4 %__u to <4 x i1>
19838 %4 = and <4 x i1> %2, %3
19839 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19840 %6 = bitcast <32 x i1> %5 to i32
19844 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19845 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
19846 ; VLX: # %bb.0: # %entry
19847 ; VLX-NEXT: kmovd %edi, %k1
19848 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19849 ; VLX-NEXT: kmovd %k0, %eax
19852 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
19853 ; NoVLX: # %bb.0: # %entry
19854 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19855 ; NoVLX-NEXT: kmovw %edi, %k1
19856 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19857 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19858 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19859 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19860 ; NoVLX-NEXT: kmovw %k0, %eax
19861 ; NoVLX-NEXT: vzeroupper
19864 %0 = bitcast <2 x i64> %__a to <4 x float>
19865 %load = load <2 x i64>, <2 x i64>* %__b
19866 %1 = bitcast <2 x i64> %load to <4 x float>
19867 %2 = fcmp oeq <4 x float> %0, %1
19868 %3 = bitcast i4 %__u to <4 x i1>
19869 %4 = and <4 x i1> %2, %3
19870 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19871 %6 = bitcast <32 x i1> %5 to i32
19875 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
19876 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19877 ; VLX: # %bb.0: # %entry
19878 ; VLX-NEXT: kmovd %edi, %k1
19879 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19880 ; VLX-NEXT: kmovd %k0, %eax
19883 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19884 ; NoVLX: # %bb.0: # %entry
19885 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19886 ; NoVLX-NEXT: kmovw %edi, %k1
19887 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
19888 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19889 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19890 ; NoVLX-NEXT: kmovw %k0, %eax
19891 ; NoVLX-NEXT: vzeroupper
19894 %0 = bitcast <2 x i64> %__a to <4 x float>
19895 %load = load float, float* %__b
19896 %vec = insertelement <4 x float> undef, float %load, i32 0
19897 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19898 %2 = fcmp oeq <4 x float> %0, %1
19899 %3 = bitcast i4 %__u to <4 x i1>
19900 %4 = and <4 x i1> %2, %3
19901 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19902 %6 = bitcast <32 x i1> %5 to i32
19908 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19909 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
19910 ; VLX: # %bb.0: # %entry
19911 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19912 ; VLX-NEXT: kmovq %k0, %rax
19915 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
19916 ; NoVLX: # %bb.0: # %entry
19917 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19918 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19919 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19920 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19921 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19922 ; NoVLX-NEXT: kmovw %k0, %eax
19923 ; NoVLX-NEXT: vzeroupper
19926 %0 = bitcast <2 x i64> %__a to <4 x float>
19927 %1 = bitcast <2 x i64> %__b to <4 x float>
19928 %2 = fcmp oeq <4 x float> %0, %1
19929 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19930 %4 = bitcast <64 x i1> %3 to i64
19934 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19935 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
19936 ; VLX: # %bb.0: # %entry
19937 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19938 ; VLX-NEXT: kmovq %k0, %rax
19941 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
19942 ; NoVLX: # %bb.0: # %entry
19943 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19944 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19945 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19946 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19947 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19948 ; NoVLX-NEXT: kmovw %k0, %eax
19949 ; NoVLX-NEXT: vzeroupper
19952 %0 = bitcast <2 x i64> %__a to <4 x float>
19953 %load = load <2 x i64>, <2 x i64>* %__b
19954 %1 = bitcast <2 x i64> %load to <4 x float>
19955 %2 = fcmp oeq <4 x float> %0, %1
19956 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19957 %4 = bitcast <64 x i1> %3 to i64
19961 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
19962 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
19963 ; VLX: # %bb.0: # %entry
19964 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19965 ; VLX-NEXT: kmovq %k0, %rax
19968 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
19969 ; NoVLX: # %bb.0: # %entry
19970 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19971 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
19972 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19973 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19974 ; NoVLX-NEXT: kmovw %k0, %eax
19975 ; NoVLX-NEXT: vzeroupper
19978 %0 = bitcast <2 x i64> %__a to <4 x float>
19979 %load = load float, float* %__b
19980 %vec = insertelement <4 x float> undef, float %load, i32 0
19981 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19982 %2 = fcmp oeq <4 x float> %0, %1
19983 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19984 %4 = bitcast <64 x i1> %3 to i64
19988 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19989 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
19990 ; VLX: # %bb.0: # %entry
19991 ; VLX-NEXT: kmovd %edi, %k1
19992 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19993 ; VLX-NEXT: kmovq %k0, %rax
19996 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
19997 ; NoVLX: # %bb.0: # %entry
19998 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19999 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20000 ; NoVLX-NEXT: kmovw %edi, %k1
20001 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20002 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20003 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20004 ; NoVLX-NEXT: kmovw %k0, %eax
20005 ; NoVLX-NEXT: vzeroupper
20008 %0 = bitcast <2 x i64> %__a to <4 x float>
20009 %1 = bitcast <2 x i64> %__b to <4 x float>
20010 %2 = fcmp oeq <4 x float> %0, %1
20011 %3 = bitcast i4 %__u to <4 x i1>
20012 %4 = and <4 x i1> %2, %3
20013 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20014 %6 = bitcast <64 x i1> %5 to i64
20018 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20019 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20020 ; VLX: # %bb.0: # %entry
20021 ; VLX-NEXT: kmovd %edi, %k1
20022 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
20023 ; VLX-NEXT: kmovq %k0, %rax
20026 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20027 ; NoVLX: # %bb.0: # %entry
20028 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20029 ; NoVLX-NEXT: kmovw %edi, %k1
20030 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
20031 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20032 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20033 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20034 ; NoVLX-NEXT: kmovw %k0, %eax
20035 ; NoVLX-NEXT: vzeroupper
20038 %0 = bitcast <2 x i64> %__a to <4 x float>
20039 %load = load <2 x i64>, <2 x i64>* %__b
20040 %1 = bitcast <2 x i64> %load to <4 x float>
20041 %2 = fcmp oeq <4 x float> %0, %1
20042 %3 = bitcast i4 %__u to <4 x i1>
20043 %4 = and <4 x i1> %2, %3
20044 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20045 %6 = bitcast <64 x i1> %5 to i64
20049 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
20050 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20051 ; VLX: # %bb.0: # %entry
20052 ; VLX-NEXT: kmovd %edi, %k1
20053 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20054 ; VLX-NEXT: kmovq %k0, %rax
20057 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20058 ; NoVLX: # %bb.0: # %entry
20059 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20060 ; NoVLX-NEXT: kmovw %edi, %k1
20061 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20062 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20063 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20064 ; NoVLX-NEXT: kmovw %k0, %eax
20065 ; NoVLX-NEXT: vzeroupper
20068 %0 = bitcast <2 x i64> %__a to <4 x float>
20069 %load = load float, float* %__b
20070 %vec = insertelement <4 x float> undef, float %load, i32 0
20071 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20072 %2 = fcmp oeq <4 x float> %0, %1
20073 %3 = bitcast i4 %__u to <4 x i1>
20074 %4 = and <4 x i1> %2, %3
20075 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20076 %6 = bitcast <64 x i1> %5 to i64
20082 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20083 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20084 ; VLX: # %bb.0: # %entry
20085 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20086 ; VLX-NEXT: kmovd %k0, %eax
20087 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20088 ; VLX-NEXT: vzeroupper
20091 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20092 ; NoVLX: # %bb.0: # %entry
20093 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20094 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20095 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20096 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20097 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20098 ; NoVLX-NEXT: kmovw %k0, %eax
20099 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20100 ; NoVLX-NEXT: vzeroupper
20103 %0 = bitcast <4 x i64> %__a to <8 x float>
20104 %1 = bitcast <4 x i64> %__b to <8 x float>
20105 %2 = fcmp oeq <8 x float> %0, %1
20106 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20107 %4 = bitcast <16 x i1> %3 to i16
20111 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20112 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20113 ; VLX: # %bb.0: # %entry
20114 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20115 ; VLX-NEXT: kmovd %k0, %eax
20116 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20117 ; VLX-NEXT: vzeroupper
20120 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20121 ; NoVLX: # %bb.0: # %entry
20122 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20123 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20124 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20125 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20126 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20127 ; NoVLX-NEXT: kmovw %k0, %eax
20128 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20129 ; NoVLX-NEXT: vzeroupper
20132 %0 = bitcast <4 x i64> %__a to <8 x float>
20133 %load = load <4 x i64>, <4 x i64>* %__b
20134 %1 = bitcast <4 x i64> %load to <8 x float>
20135 %2 = fcmp oeq <8 x float> %0, %1
20136 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20137 %4 = bitcast <16 x i1> %3 to i16
20141 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20142 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20143 ; VLX: # %bb.0: # %entry
20144 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20145 ; VLX-NEXT: kmovd %k0, %eax
20146 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20147 ; VLX-NEXT: vzeroupper
20150 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20151 ; NoVLX: # %bb.0: # %entry
20152 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20153 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20154 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20155 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20156 ; NoVLX-NEXT: kmovw %k0, %eax
20157 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20158 ; NoVLX-NEXT: vzeroupper
20161 %0 = bitcast <4 x i64> %__a to <8 x float>
20162 %load = load float, float* %__b
20163 %vec = insertelement <8 x float> undef, float %load, i32 0
20164 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20165 %2 = fcmp oeq <8 x float> %0, %1
20166 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20167 %4 = bitcast <16 x i1> %3 to i16
20171 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20172 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20173 ; VLX: # %bb.0: # %entry
20174 ; VLX-NEXT: kmovd %edi, %k1
20175 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20176 ; VLX-NEXT: kmovd %k0, %eax
20177 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20178 ; VLX-NEXT: vzeroupper
20181 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20182 ; NoVLX: # %bb.0: # %entry
20183 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20184 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20185 ; NoVLX-NEXT: kmovw %edi, %k1
20186 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20187 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20188 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20189 ; NoVLX-NEXT: kmovw %k0, %eax
20190 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20191 ; NoVLX-NEXT: vzeroupper
20194 %0 = bitcast <4 x i64> %__a to <8 x float>
20195 %1 = bitcast <4 x i64> %__b to <8 x float>
20196 %2 = fcmp oeq <8 x float> %0, %1
20197 %3 = bitcast i8 %__u to <8 x i1>
20198 %4 = and <8 x i1> %2, %3
20199 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20200 %6 = bitcast <16 x i1> %5 to i16
20204 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20205 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20206 ; VLX: # %bb.0: # %entry
20207 ; VLX-NEXT: kmovd %edi, %k1
20208 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20209 ; VLX-NEXT: kmovd %k0, %eax
20210 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20211 ; VLX-NEXT: vzeroupper
20214 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20215 ; NoVLX: # %bb.0: # %entry
20216 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20217 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20218 ; NoVLX-NEXT: kmovw %edi, %k1
20219 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20220 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20221 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20222 ; NoVLX-NEXT: kmovw %k0, %eax
20223 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20224 ; NoVLX-NEXT: vzeroupper
20227 %0 = bitcast <4 x i64> %__a to <8 x float>
20228 %load = load <4 x i64>, <4 x i64>* %__b
20229 %1 = bitcast <4 x i64> %load to <8 x float>
20230 %2 = fcmp oeq <8 x float> %0, %1
20231 %3 = bitcast i8 %__u to <8 x i1>
20232 %4 = and <8 x i1> %2, %3
20233 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20234 %6 = bitcast <16 x i1> %5 to i16
20238 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20239 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20240 ; VLX: # %bb.0: # %entry
20241 ; VLX-NEXT: kmovd %edi, %k1
20242 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20243 ; VLX-NEXT: kmovd %k0, %eax
20244 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20245 ; VLX-NEXT: vzeroupper
20248 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20249 ; NoVLX: # %bb.0: # %entry
20250 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20251 ; NoVLX-NEXT: kmovw %edi, %k1
20252 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20253 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20254 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20255 ; NoVLX-NEXT: kmovw %k0, %eax
20256 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20257 ; NoVLX-NEXT: vzeroupper
20260 %0 = bitcast <4 x i64> %__a to <8 x float>
20261 %load = load float, float* %__b
20262 %vec = insertelement <8 x float> undef, float %load, i32 0
20263 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20264 %2 = fcmp oeq <8 x float> %0, %1
20265 %3 = bitcast i8 %__u to <8 x i1>
20266 %4 = and <8 x i1> %2, %3
20267 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20268 %6 = bitcast <16 x i1> %5 to i16
20274 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20275 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20276 ; VLX: # %bb.0: # %entry
20277 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20278 ; VLX-NEXT: kmovd %k0, %eax
20279 ; VLX-NEXT: vzeroupper
20282 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20283 ; NoVLX: # %bb.0: # %entry
20284 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20285 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20286 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20287 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20288 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20289 ; NoVLX-NEXT: kmovw %k0, %eax
20290 ; NoVLX-NEXT: vzeroupper
20293 %0 = bitcast <4 x i64> %__a to <8 x float>
20294 %1 = bitcast <4 x i64> %__b to <8 x float>
20295 %2 = fcmp oeq <8 x float> %0, %1
20296 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20297 %4 = bitcast <32 x i1> %3 to i32
20301 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20302 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20303 ; VLX: # %bb.0: # %entry
20304 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20305 ; VLX-NEXT: kmovd %k0, %eax
20306 ; VLX-NEXT: vzeroupper
20309 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20310 ; NoVLX: # %bb.0: # %entry
20311 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20312 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20313 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20314 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20315 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20316 ; NoVLX-NEXT: kmovw %k0, %eax
20317 ; NoVLX-NEXT: vzeroupper
20320 %0 = bitcast <4 x i64> %__a to <8 x float>
20321 %load = load <4 x i64>, <4 x i64>* %__b
20322 %1 = bitcast <4 x i64> %load to <8 x float>
20323 %2 = fcmp oeq <8 x float> %0, %1
20324 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20325 %4 = bitcast <32 x i1> %3 to i32
20329 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20330 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20331 ; VLX: # %bb.0: # %entry
20332 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20333 ; VLX-NEXT: kmovd %k0, %eax
20334 ; VLX-NEXT: vzeroupper
20337 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20338 ; NoVLX: # %bb.0: # %entry
20339 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20340 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20341 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20342 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20343 ; NoVLX-NEXT: kmovw %k0, %eax
20344 ; NoVLX-NEXT: vzeroupper
20347 %0 = bitcast <4 x i64> %__a to <8 x float>
20348 %load = load float, float* %__b
20349 %vec = insertelement <8 x float> undef, float %load, i32 0
20350 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20351 %2 = fcmp oeq <8 x float> %0, %1
20352 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20353 %4 = bitcast <32 x i1> %3 to i32
20357 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20358 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20359 ; VLX: # %bb.0: # %entry
20360 ; VLX-NEXT: kmovd %edi, %k1
20361 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20362 ; VLX-NEXT: kmovd %k0, %eax
20363 ; VLX-NEXT: vzeroupper
20366 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20367 ; NoVLX: # %bb.0: # %entry
20368 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20369 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20370 ; NoVLX-NEXT: kmovw %edi, %k1
20371 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20372 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20373 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20374 ; NoVLX-NEXT: kmovw %k0, %eax
20375 ; NoVLX-NEXT: vzeroupper
20378 %0 = bitcast <4 x i64> %__a to <8 x float>
20379 %1 = bitcast <4 x i64> %__b to <8 x float>
20380 %2 = fcmp oeq <8 x float> %0, %1
20381 %3 = bitcast i8 %__u to <8 x i1>
20382 %4 = and <8 x i1> %2, %3
20383 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20384 %6 = bitcast <32 x i1> %5 to i32
20388 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20389 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20390 ; VLX: # %bb.0: # %entry
20391 ; VLX-NEXT: kmovd %edi, %k1
20392 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20393 ; VLX-NEXT: kmovd %k0, %eax
20394 ; VLX-NEXT: vzeroupper
20397 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20398 ; NoVLX: # %bb.0: # %entry
20399 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20400 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20401 ; NoVLX-NEXT: kmovw %edi, %k1
20402 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20403 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20404 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20405 ; NoVLX-NEXT: kmovw %k0, %eax
20406 ; NoVLX-NEXT: vzeroupper
20409 %0 = bitcast <4 x i64> %__a to <8 x float>
20410 %load = load <4 x i64>, <4 x i64>* %__b
20411 %1 = bitcast <4 x i64> %load to <8 x float>
20412 %2 = fcmp oeq <8 x float> %0, %1
20413 %3 = bitcast i8 %__u to <8 x i1>
20414 %4 = and <8 x i1> %2, %3
20415 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20416 %6 = bitcast <32 x i1> %5 to i32
20420 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20421 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20422 ; VLX: # %bb.0: # %entry
20423 ; VLX-NEXT: kmovd %edi, %k1
20424 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20425 ; VLX-NEXT: kmovd %k0, %eax
20426 ; VLX-NEXT: vzeroupper
20429 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20430 ; NoVLX: # %bb.0: # %entry
20431 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20432 ; NoVLX-NEXT: kmovw %edi, %k1
20433 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20434 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20435 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20436 ; NoVLX-NEXT: kmovw %k0, %eax
20437 ; NoVLX-NEXT: vzeroupper
20440 %0 = bitcast <4 x i64> %__a to <8 x float>
20441 %load = load float, float* %__b
20442 %vec = insertelement <8 x float> undef, float %load, i32 0
20443 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20444 %2 = fcmp oeq <8 x float> %0, %1
20445 %3 = bitcast i8 %__u to <8 x i1>
20446 %4 = and <8 x i1> %2, %3
20447 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20448 %6 = bitcast <32 x i1> %5 to i32
20454 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20455 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20456 ; VLX: # %bb.0: # %entry
20457 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20458 ; VLX-NEXT: kmovq %k0, %rax
20459 ; VLX-NEXT: vzeroupper
20462 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20463 ; NoVLX: # %bb.0: # %entry
20464 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20465 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20466 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20467 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20468 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20469 ; NoVLX-NEXT: kmovw %k0, %eax
20470 ; NoVLX-NEXT: vzeroupper
20473 %0 = bitcast <4 x i64> %__a to <8 x float>
20474 %1 = bitcast <4 x i64> %__b to <8 x float>
20475 %2 = fcmp oeq <8 x float> %0, %1
20476 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20477 %4 = bitcast <64 x i1> %3 to i64
20481 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20482 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20483 ; VLX: # %bb.0: # %entry
20484 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20485 ; VLX-NEXT: kmovq %k0, %rax
20486 ; VLX-NEXT: vzeroupper
20489 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20490 ; NoVLX: # %bb.0: # %entry
20491 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20492 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20493 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20494 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20495 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20496 ; NoVLX-NEXT: kmovw %k0, %eax
20497 ; NoVLX-NEXT: vzeroupper
20500 %0 = bitcast <4 x i64> %__a to <8 x float>
20501 %load = load <4 x i64>, <4 x i64>* %__b
20502 %1 = bitcast <4 x i64> %load to <8 x float>
20503 %2 = fcmp oeq <8 x float> %0, %1
20504 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20505 %4 = bitcast <64 x i1> %3 to i64
20509 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20510 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20511 ; VLX: # %bb.0: # %entry
20512 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20513 ; VLX-NEXT: kmovq %k0, %rax
20514 ; VLX-NEXT: vzeroupper
20517 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20518 ; NoVLX: # %bb.0: # %entry
20519 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20520 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20521 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20522 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20523 ; NoVLX-NEXT: kmovw %k0, %eax
20524 ; NoVLX-NEXT: vzeroupper
20527 %0 = bitcast <4 x i64> %__a to <8 x float>
20528 %load = load float, float* %__b
20529 %vec = insertelement <8 x float> undef, float %load, i32 0
20530 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20531 %2 = fcmp oeq <8 x float> %0, %1
20532 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20533 %4 = bitcast <64 x i1> %3 to i64
20537 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20538 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20539 ; VLX: # %bb.0: # %entry
20540 ; VLX-NEXT: kmovd %edi, %k1
20541 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20542 ; VLX-NEXT: kmovq %k0, %rax
20543 ; VLX-NEXT: vzeroupper
20546 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20547 ; NoVLX: # %bb.0: # %entry
20548 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20549 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20550 ; NoVLX-NEXT: kmovw %edi, %k1
20551 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20552 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20553 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20554 ; NoVLX-NEXT: kmovw %k0, %eax
20555 ; NoVLX-NEXT: vzeroupper
20558 %0 = bitcast <4 x i64> %__a to <8 x float>
20559 %1 = bitcast <4 x i64> %__b to <8 x float>
20560 %2 = fcmp oeq <8 x float> %0, %1
20561 %3 = bitcast i8 %__u to <8 x i1>
20562 %4 = and <8 x i1> %2, %3
20563 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20564 %6 = bitcast <64 x i1> %5 to i64
20568 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20569 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20570 ; VLX: # %bb.0: # %entry
20571 ; VLX-NEXT: kmovd %edi, %k1
20572 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20573 ; VLX-NEXT: kmovq %k0, %rax
20574 ; VLX-NEXT: vzeroupper
20577 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20578 ; NoVLX: # %bb.0: # %entry
20579 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20580 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20581 ; NoVLX-NEXT: kmovw %edi, %k1
20582 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20583 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20584 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20585 ; NoVLX-NEXT: kmovw %k0, %eax
20586 ; NoVLX-NEXT: vzeroupper
20589 %0 = bitcast <4 x i64> %__a to <8 x float>
20590 %load = load <4 x i64>, <4 x i64>* %__b
20591 %1 = bitcast <4 x i64> %load to <8 x float>
20592 %2 = fcmp oeq <8 x float> %0, %1
20593 %3 = bitcast i8 %__u to <8 x i1>
20594 %4 = and <8 x i1> %2, %3
20595 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20596 %6 = bitcast <64 x i1> %5 to i64
20600 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20601 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20602 ; VLX: # %bb.0: # %entry
20603 ; VLX-NEXT: kmovd %edi, %k1
20604 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20605 ; VLX-NEXT: kmovq %k0, %rax
20606 ; VLX-NEXT: vzeroupper
20609 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20610 ; NoVLX: # %bb.0: # %entry
20611 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20612 ; NoVLX-NEXT: kmovw %edi, %k1
20613 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20614 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20615 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20616 ; NoVLX-NEXT: kmovw %k0, %eax
20617 ; NoVLX-NEXT: vzeroupper
20620 %0 = bitcast <4 x i64> %__a to <8 x float>
20621 %load = load float, float* %__b
20622 %vec = insertelement <8 x float> undef, float %load, i32 0
20623 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20624 %2 = fcmp oeq <8 x float> %0, %1
20625 %3 = bitcast i8 %__u to <8 x i1>
20626 %4 = and <8 x i1> %2, %3
20627 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20628 %6 = bitcast <64 x i1> %5 to i64
20634 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20635 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
20636 ; VLX: # %bb.0: # %entry
20637 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20638 ; VLX-NEXT: kmovd %k0, %eax
20639 ; VLX-NEXT: vzeroupper
20642 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
20643 ; NoVLX: # %bb.0: # %entry
20644 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20645 ; NoVLX-NEXT: kmovw %k0, %eax
20646 ; NoVLX-NEXT: vzeroupper
20649 %0 = bitcast <8 x i64> %__a to <16 x float>
20650 %1 = bitcast <8 x i64> %__b to <16 x float>
20651 %2 = fcmp oeq <16 x float> %0, %1
20652 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20653 %4 = bitcast <32 x i1> %3 to i32
20657 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
20658 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
20659 ; VLX: # %bb.0: # %entry
20660 ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
20661 ; VLX-NEXT: kmovd %k0, %eax
20662 ; VLX-NEXT: vzeroupper
20665 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
20666 ; NoVLX: # %bb.0: # %entry
20667 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
20668 ; NoVLX-NEXT: kmovw %k0, %eax
20669 ; NoVLX-NEXT: vzeroupper
20672 %0 = bitcast <8 x i64> %__a to <16 x float>
20673 %load = load <8 x i64>, <8 x i64>* %__b
20674 %1 = bitcast <8 x i64> %load to <16 x float>
20675 %2 = fcmp oeq <16 x float> %0, %1
20676 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20677 %4 = bitcast <32 x i1> %3 to i32
20681 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
20682 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20683 ; VLX: # %bb.0: # %entry
20684 ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20685 ; VLX-NEXT: kmovd %k0, %eax
20686 ; VLX-NEXT: vzeroupper
20689 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20690 ; NoVLX: # %bb.0: # %entry
20691 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20692 ; NoVLX-NEXT: kmovw %k0, %eax
20693 ; NoVLX-NEXT: vzeroupper
20696 %0 = bitcast <8 x i64> %__a to <16 x float>
20697 %load = load float, float* %__b
20698 %vec = insertelement <16 x float> undef, float %load, i32 0
20699 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20700 %2 = fcmp oeq <16 x float> %0, %1
20701 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20702 %4 = bitcast <32 x i1> %3 to i32
20706 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20707 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
20708 ; VLX: # %bb.0: # %entry
20709 ; VLX-NEXT: kmovd %edi, %k1
20710 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20711 ; VLX-NEXT: kmovd %k0, %eax
20712 ; VLX-NEXT: vzeroupper
20715 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
20716 ; NoVLX: # %bb.0: # %entry
20717 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20718 ; NoVLX-NEXT: kmovw %k0, %eax
20719 ; NoVLX-NEXT: andl %edi, %eax
20720 ; NoVLX-NEXT: vzeroupper
20723 %0 = bitcast <8 x i64> %__a to <16 x float>
20724 %1 = bitcast <8 x i64> %__b to <16 x float>
20725 %2 = fcmp oeq <16 x float> %0, %1
20726 %3 = bitcast i16 %__u to <16 x i1>
20727 %4 = and <16 x i1> %2, %3
20728 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20729 %6 = bitcast <32 x i1> %5 to i32
20733 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
20734 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
20735 ; VLX: # %bb.0: # %entry
20736 ; VLX-NEXT: kmovd %edi, %k1
20737 ; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
20738 ; VLX-NEXT: kmovd %k0, %eax
20739 ; VLX-NEXT: vzeroupper
20742 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
20743 ; NoVLX: # %bb.0: # %entry
20744 ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0
20745 ; NoVLX-NEXT: kmovw %k0, %eax
20746 ; NoVLX-NEXT: andl %edi, %eax
20747 ; NoVLX-NEXT: vzeroupper
20750 %0 = bitcast <8 x i64> %__a to <16 x float>
20751 %load = load <8 x i64>, <8 x i64>* %__b
20752 %1 = bitcast <8 x i64> %load to <16 x float>
20753 %2 = fcmp oeq <16 x float> %0, %1
20754 %3 = bitcast i16 %__u to <16 x i1>
20755 %4 = and <16 x i1> %2, %3
20756 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20757 %6 = bitcast <32 x i1> %5 to i32
20761 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
20762 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20763 ; VLX: # %bb.0: # %entry
20764 ; VLX-NEXT: kmovd %edi, %k1
20765 ; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20766 ; VLX-NEXT: kmovd %k0, %eax
20767 ; VLX-NEXT: vzeroupper
20770 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20771 ; NoVLX: # %bb.0: # %entry
20772 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0
20773 ; NoVLX-NEXT: kmovw %k0, %eax
20774 ; NoVLX-NEXT: andl %edi, %eax
20775 ; NoVLX-NEXT: vzeroupper
20778 %0 = bitcast <8 x i64> %__a to <16 x float>
20779 %load = load float, float* %__b
20780 %vec = insertelement <16 x float> undef, float %load, i32 0
20781 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20782 %2 = fcmp oeq <16 x float> %0, %1
20783 %3 = bitcast i16 %__u to <16 x i1>
20784 %4 = and <16 x i1> %2, %3
20785 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20786 %6 = bitcast <32 x i1> %5 to i32
20792 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20793 ; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
20794 ; CHECK: # %bb.0: # %entry
20795 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
20796 ; CHECK-NEXT: kmovw %k0, %eax
20797 ; CHECK-NEXT: vzeroupper
20800 %0 = bitcast <8 x i64> %__a to <16 x float>
20801 %1 = bitcast <8 x i64> %__b to <16 x float>
20802 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
20803 %3 = bitcast <16 x i1> %2 to i16
20804 %4 = zext i16 %3 to i32
20808 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20809 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
20810 ; VLX: # %bb.0: # %entry
20811 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
20812 ; VLX-NEXT: kmovd %k0, %eax
20813 ; VLX-NEXT: andl %edi, %eax
20814 ; VLX-NEXT: vzeroupper
20817 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
20818 ; NoVLX: # %bb.0: # %entry
20819 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
20820 ; NoVLX-NEXT: kmovw %k0, %eax
20821 ; NoVLX-NEXT: andl %edi, %eax
20822 ; NoVLX-NEXT: vzeroupper
20825 %0 = bitcast <8 x i64> %__a to <16 x float>
20826 %1 = bitcast <8 x i64> %__b to <16 x float>
20827 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
20828 %3 = bitcast i16 %__u to <16 x i1>
20829 %4 = and <16 x i1> %2, %3
20830 %5 = bitcast <16 x i1> %4 to i16
20831 %6 = zext i16 %5 to i32
20837 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20838 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
20839 ; VLX: # %bb.0: # %entry
20840 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20841 ; VLX-NEXT: kmovq %k0, %rax
20842 ; VLX-NEXT: vzeroupper
20845 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
20846 ; NoVLX: # %bb.0: # %entry
20847 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20848 ; NoVLX-NEXT: kmovw %k0, %eax
20849 ; NoVLX-NEXT: vzeroupper
20852 %0 = bitcast <8 x i64> %__a to <16 x float>
20853 %1 = bitcast <8 x i64> %__b to <16 x float>
20854 %2 = fcmp oeq <16 x float> %0, %1
20855 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20856 %4 = bitcast <64 x i1> %3 to i64
20860 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
20861 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
20862 ; VLX: # %bb.0: # %entry
20863 ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
20864 ; VLX-NEXT: kmovq %k0, %rax
20865 ; VLX-NEXT: vzeroupper
20868 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
20869 ; NoVLX: # %bb.0: # %entry
20870 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
20871 ; NoVLX-NEXT: kmovw %k0, %eax
20872 ; NoVLX-NEXT: vzeroupper
20875 %0 = bitcast <8 x i64> %__a to <16 x float>
20876 %load = load <8 x i64>, <8 x i64>* %__b
20877 %1 = bitcast <8 x i64> %load to <16 x float>
20878 %2 = fcmp oeq <16 x float> %0, %1
20879 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20880 %4 = bitcast <64 x i1> %3 to i64
20884 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
20885 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20886 ; VLX: # %bb.0: # %entry
20887 ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20888 ; VLX-NEXT: kmovq %k0, %rax
20889 ; VLX-NEXT: vzeroupper
20892 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20893 ; NoVLX: # %bb.0: # %entry
20894 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20895 ; NoVLX-NEXT: kmovw %k0, %eax
20896 ; NoVLX-NEXT: vzeroupper
20899 %0 = bitcast <8 x i64> %__a to <16 x float>
20900 %load = load float, float* %__b
20901 %vec = insertelement <16 x float> undef, float %load, i32 0
20902 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20903 %2 = fcmp oeq <16 x float> %0, %1
20904 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20905 %4 = bitcast <64 x i1> %3 to i64
20909 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20910 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
20911 ; VLX: # %bb.0: # %entry
20912 ; VLX-NEXT: kmovd %edi, %k1
20913 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20914 ; VLX-NEXT: kmovq %k0, %rax
20915 ; VLX-NEXT: vzeroupper
20918 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
20919 ; NoVLX: # %bb.0: # %entry
20920 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20921 ; NoVLX-NEXT: kmovw %k0, %eax
20922 ; NoVLX-NEXT: andl %edi, %eax
20923 ; NoVLX-NEXT: vzeroupper
20926 %0 = bitcast <8 x i64> %__a to <16 x float>
20927 %1 = bitcast <8 x i64> %__b to <16 x float>
20928 %2 = fcmp oeq <16 x float> %0, %1
20929 %3 = bitcast i16 %__u to <16 x i1>
20930 %4 = and <16 x i1> %2, %3
20931 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20932 %6 = bitcast <64 x i1> %5 to i64
20936 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
20937 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
20938 ; VLX: # %bb.0: # %entry
20939 ; VLX-NEXT: kmovd %edi, %k1
20940 ; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
20941 ; VLX-NEXT: kmovq %k0, %rax
20942 ; VLX-NEXT: vzeroupper
20945 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
20946 ; NoVLX: # %bb.0: # %entry
20947 ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0
20948 ; NoVLX-NEXT: kmovw %k0, %eax
20949 ; NoVLX-NEXT: andl %edi, %eax
20950 ; NoVLX-NEXT: vzeroupper
20953 %0 = bitcast <8 x i64> %__a to <16 x float>
20954 %load = load <8 x i64>, <8 x i64>* %__b
20955 %1 = bitcast <8 x i64> %load to <16 x float>
20956 %2 = fcmp oeq <16 x float> %0, %1
20957 %3 = bitcast i16 %__u to <16 x i1>
20958 %4 = and <16 x i1> %2, %3
20959 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20960 %6 = bitcast <64 x i1> %5 to i64
20964 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
20965 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20966 ; VLX: # %bb.0: # %entry
20967 ; VLX-NEXT: kmovd %edi, %k1
20968 ; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20969 ; VLX-NEXT: kmovq %k0, %rax
20970 ; VLX-NEXT: vzeroupper
20973 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20974 ; NoVLX: # %bb.0: # %entry
20975 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0
20976 ; NoVLX-NEXT: kmovw %k0, %eax
20977 ; NoVLX-NEXT: andl %edi, %eax
20978 ; NoVLX-NEXT: vzeroupper
20981 %0 = bitcast <8 x i64> %__a to <16 x float>
20982 %load = load float, float* %__b
20983 %vec = insertelement <16 x float> undef, float %load, i32 0
20984 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20985 %2 = fcmp oeq <16 x float> %0, %1
20986 %3 = bitcast i16 %__u to <16 x i1>
20987 %4 = and <16 x i1> %2, %3
20988 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20989 %6 = bitcast <64 x i1> %5 to i64
20995 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20996 ; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
20997 ; CHECK: # %bb.0: # %entry
20998 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
20999 ; CHECK-NEXT: kmovw %k0, %eax
21000 ; CHECK-NEXT: vzeroupper
21003 %0 = bitcast <8 x i64> %__a to <16 x float>
21004 %1 = bitcast <8 x i64> %__b to <16 x float>
21005 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21006 %3 = bitcast <16 x i1> %2 to i16
21007 %4 = zext i16 %3 to i64
21011 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21012 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21013 ; VLX: # %bb.0: # %entry
21014 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21015 ; VLX-NEXT: kmovd %k0, %eax
21016 ; VLX-NEXT: andl %edi, %eax
21017 ; VLX-NEXT: vzeroupper
21020 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21021 ; NoVLX: # %bb.0: # %entry
21022 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21023 ; NoVLX-NEXT: kmovw %k0, %eax
21024 ; NoVLX-NEXT: andl %edi, %eax
21025 ; NoVLX-NEXT: vzeroupper
21028 %0 = bitcast <8 x i64> %__a to <16 x float>
21029 %1 = bitcast <8 x i64> %__b to <16 x float>
21030 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21031 %3 = bitcast i16 %__u to <16 x i1>
21032 %4 = and <16 x i1> %2, %3
21033 %5 = bitcast <16 x i1> %4 to i16
21034 %6 = zext i16 %5 to i64
21040 declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
21041 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21042 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21043 ; VLX: # %bb.0: # %entry
21044 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21045 ; VLX-NEXT: kmovb %k0, %eax
21048 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21049 ; NoVLX: # %bb.0: # %entry
21050 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21051 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21052 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21053 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21054 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21055 ; NoVLX-NEXT: kmovw %k0, %eax
21056 ; NoVLX-NEXT: vzeroupper
21059 %0 = bitcast <2 x i64> %__a to <2 x double>
21060 %1 = bitcast <2 x i64> %__b to <2 x double>
21061 %2 = fcmp oeq <2 x double> %0, %1
21062 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21063 %4 = bitcast <4 x i1> %3 to i4
21067 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21068 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21069 ; VLX: # %bb.0: # %entry
21070 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21071 ; VLX-NEXT: kmovb %k0, %eax
21074 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21075 ; NoVLX: # %bb.0: # %entry
21076 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21077 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21078 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21079 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21080 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21081 ; NoVLX-NEXT: kmovw %k0, %eax
21082 ; NoVLX-NEXT: vzeroupper
21085 %0 = bitcast <2 x i64> %__a to <2 x double>
21086 %load = load <2 x i64>, <2 x i64>* %__b
21087 %1 = bitcast <2 x i64> %load to <2 x double>
21088 %2 = fcmp oeq <2 x double> %0, %1
21089 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21090 %4 = bitcast <4 x i1> %3 to i4
21094 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21095 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21096 ; VLX: # %bb.0: # %entry
21097 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21098 ; VLX-NEXT: kmovb %k0, %eax
21101 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21102 ; NoVLX: # %bb.0: # %entry
21103 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21104 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21105 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21106 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21107 ; NoVLX-NEXT: kmovw %k0, %eax
21108 ; NoVLX-NEXT: vzeroupper
21111 %0 = bitcast <2 x i64> %__a to <2 x double>
21112 %load = load double, double* %__b
21113 %vec = insertelement <2 x double> undef, double %load, i32 0
21114 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21115 %2 = fcmp oeq <2 x double> %0, %1
21116 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21117 %4 = bitcast <4 x i1> %3 to i4
21121 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21122 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21123 ; VLX: # %bb.0: # %entry
21124 ; VLX-NEXT: kmovd %edi, %k1
21125 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21126 ; VLX-NEXT: kmovb %k0, %eax
21129 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21130 ; NoVLX: # %bb.0: # %entry
21131 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21132 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21133 ; NoVLX-NEXT: kmovw %edi, %k1
21134 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21135 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21136 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21137 ; NoVLX-NEXT: kmovw %k0, %eax
21138 ; NoVLX-NEXT: vzeroupper
21141 %0 = bitcast <2 x i64> %__a to <2 x double>
21142 %1 = bitcast <2 x i64> %__b to <2 x double>
21143 %2 = fcmp oeq <2 x double> %0, %1
21144 %3 = bitcast i2 %__u to <2 x i1>
21145 %4 = and <2 x i1> %2, %3
21146 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21147 %6 = bitcast <4 x i1> %5 to i4
21151 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21152 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21153 ; VLX: # %bb.0: # %entry
21154 ; VLX-NEXT: kmovd %edi, %k1
21155 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21156 ; VLX-NEXT: kmovb %k0, %eax
21159 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21160 ; NoVLX: # %bb.0: # %entry
21161 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21162 ; NoVLX-NEXT: kmovw %edi, %k1
21163 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21164 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21165 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21166 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21167 ; NoVLX-NEXT: kmovw %k0, %eax
21168 ; NoVLX-NEXT: vzeroupper
21171 %0 = bitcast <2 x i64> %__a to <2 x double>
21172 %load = load <2 x i64>, <2 x i64>* %__b
21173 %1 = bitcast <2 x i64> %load to <2 x double>
21174 %2 = fcmp oeq <2 x double> %0, %1
21175 %3 = bitcast i2 %__u to <2 x i1>
21176 %4 = and <2 x i1> %2, %3
21177 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21178 %6 = bitcast <4 x i1> %5 to i4
21182 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21183 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21184 ; VLX: # %bb.0: # %entry
21185 ; VLX-NEXT: kmovd %edi, %k1
21186 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21187 ; VLX-NEXT: kmovb %k0, %eax
21190 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21191 ; NoVLX: # %bb.0: # %entry
21192 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21193 ; NoVLX-NEXT: kmovw %edi, %k1
21194 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21195 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21196 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21197 ; NoVLX-NEXT: kmovw %k0, %eax
21198 ; NoVLX-NEXT: vzeroupper
21201 %0 = bitcast <2 x i64> %__a to <2 x double>
21202 %load = load double, double* %__b
21203 %vec = insertelement <2 x double> undef, double %load, i32 0
21204 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21205 %2 = fcmp oeq <2 x double> %0, %1
21206 %3 = bitcast i2 %__u to <2 x i1>
21207 %4 = and <2 x i1> %2, %3
21208 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21209 %6 = bitcast <4 x i1> %5 to i4
21215 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21216 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21217 ; VLX: # %bb.0: # %entry
21218 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21219 ; VLX-NEXT: kmovd %k0, %eax
21220 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21223 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21224 ; NoVLX: # %bb.0: # %entry
21225 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21226 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21227 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21228 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21229 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21230 ; NoVLX-NEXT: kmovw %k0, %eax
21231 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21232 ; NoVLX-NEXT: vzeroupper
21235 %0 = bitcast <2 x i64> %__a to <2 x double>
21236 %1 = bitcast <2 x i64> %__b to <2 x double>
21237 %2 = fcmp oeq <2 x double> %0, %1
21238 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21239 %4 = bitcast <8 x i1> %3 to i8
21243 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21244 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21245 ; VLX: # %bb.0: # %entry
21246 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21247 ; VLX-NEXT: kmovd %k0, %eax
21248 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21251 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21252 ; NoVLX: # %bb.0: # %entry
21253 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21254 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21255 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21256 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21257 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21258 ; NoVLX-NEXT: kmovw %k0, %eax
21259 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21260 ; NoVLX-NEXT: vzeroupper
21263 %0 = bitcast <2 x i64> %__a to <2 x double>
21264 %load = load <2 x i64>, <2 x i64>* %__b
21265 %1 = bitcast <2 x i64> %load to <2 x double>
21266 %2 = fcmp oeq <2 x double> %0, %1
21267 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21268 %4 = bitcast <8 x i1> %3 to i8
21272 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21273 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21274 ; VLX: # %bb.0: # %entry
21275 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21276 ; VLX-NEXT: kmovd %k0, %eax
21277 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21280 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21281 ; NoVLX: # %bb.0: # %entry
21282 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21283 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21284 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21285 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21286 ; NoVLX-NEXT: kmovw %k0, %eax
21287 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21288 ; NoVLX-NEXT: vzeroupper
21291 %0 = bitcast <2 x i64> %__a to <2 x double>
21292 %load = load double, double* %__b
21293 %vec = insertelement <2 x double> undef, double %load, i32 0
21294 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21295 %2 = fcmp oeq <2 x double> %0, %1
21296 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21297 %4 = bitcast <8 x i1> %3 to i8
21301 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21302 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21303 ; VLX: # %bb.0: # %entry
21304 ; VLX-NEXT: kmovd %edi, %k1
21305 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21306 ; VLX-NEXT: kmovd %k0, %eax
21307 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21310 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21311 ; NoVLX: # %bb.0: # %entry
21312 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21313 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21314 ; NoVLX-NEXT: kmovw %edi, %k1
21315 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21316 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21317 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21318 ; NoVLX-NEXT: kmovw %k0, %eax
21319 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21320 ; NoVLX-NEXT: vzeroupper
21323 %0 = bitcast <2 x i64> %__a to <2 x double>
21324 %1 = bitcast <2 x i64> %__b to <2 x double>
21325 %2 = fcmp oeq <2 x double> %0, %1
21326 %3 = bitcast i2 %__u to <2 x i1>
21327 %4 = and <2 x i1> %2, %3
21328 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21329 %6 = bitcast <8 x i1> %5 to i8
21333 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21334 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21335 ; VLX: # %bb.0: # %entry
21336 ; VLX-NEXT: kmovd %edi, %k1
21337 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21338 ; VLX-NEXT: kmovd %k0, %eax
21339 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21342 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21343 ; NoVLX: # %bb.0: # %entry
21344 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21345 ; NoVLX-NEXT: kmovw %edi, %k1
21346 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21347 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21348 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21349 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21350 ; NoVLX-NEXT: kmovw %k0, %eax
21351 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21352 ; NoVLX-NEXT: vzeroupper
21355 %0 = bitcast <2 x i64> %__a to <2 x double>
21356 %load = load <2 x i64>, <2 x i64>* %__b
21357 %1 = bitcast <2 x i64> %load to <2 x double>
21358 %2 = fcmp oeq <2 x double> %0, %1
21359 %3 = bitcast i2 %__u to <2 x i1>
21360 %4 = and <2 x i1> %2, %3
21361 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21362 %6 = bitcast <8 x i1> %5 to i8
21366 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21367 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21368 ; VLX: # %bb.0: # %entry
21369 ; VLX-NEXT: kmovd %edi, %k1
21370 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21371 ; VLX-NEXT: kmovd %k0, %eax
21372 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21375 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21376 ; NoVLX: # %bb.0: # %entry
21377 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21378 ; NoVLX-NEXT: kmovw %edi, %k1
21379 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21380 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21381 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21382 ; NoVLX-NEXT: kmovw %k0, %eax
21383 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21384 ; NoVLX-NEXT: vzeroupper
21387 %0 = bitcast <2 x i64> %__a to <2 x double>
21388 %load = load double, double* %__b
21389 %vec = insertelement <2 x double> undef, double %load, i32 0
21390 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21391 %2 = fcmp oeq <2 x double> %0, %1
21392 %3 = bitcast i2 %__u to <2 x i1>
21393 %4 = and <2 x i1> %2, %3
21394 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21395 %6 = bitcast <8 x i1> %5 to i8
21401 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21402 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21403 ; VLX: # %bb.0: # %entry
21404 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21405 ; VLX-NEXT: kmovd %k0, %eax
21406 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21409 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21410 ; NoVLX: # %bb.0: # %entry
21411 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21412 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21413 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21414 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21415 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21416 ; NoVLX-NEXT: kmovw %k0, %eax
21417 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21418 ; NoVLX-NEXT: vzeroupper
21421 %0 = bitcast <2 x i64> %__a to <2 x double>
21422 %1 = bitcast <2 x i64> %__b to <2 x double>
21423 %2 = fcmp oeq <2 x double> %0, %1
21424 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21425 %4 = bitcast <16 x i1> %3 to i16
21429 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21430 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21431 ; VLX: # %bb.0: # %entry
21432 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21433 ; VLX-NEXT: kmovd %k0, %eax
21434 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21437 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21438 ; NoVLX: # %bb.0: # %entry
21439 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21440 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21441 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21442 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21443 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21444 ; NoVLX-NEXT: kmovw %k0, %eax
21445 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21446 ; NoVLX-NEXT: vzeroupper
21449 %0 = bitcast <2 x i64> %__a to <2 x double>
21450 %load = load <2 x i64>, <2 x i64>* %__b
21451 %1 = bitcast <2 x i64> %load to <2 x double>
21452 %2 = fcmp oeq <2 x double> %0, %1
21453 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21454 %4 = bitcast <16 x i1> %3 to i16
21458 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21459 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21460 ; VLX: # %bb.0: # %entry
21461 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21462 ; VLX-NEXT: kmovd %k0, %eax
21463 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21466 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21467 ; NoVLX: # %bb.0: # %entry
21468 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21469 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21470 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21471 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21472 ; NoVLX-NEXT: kmovw %k0, %eax
21473 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21474 ; NoVLX-NEXT: vzeroupper
21477 %0 = bitcast <2 x i64> %__a to <2 x double>
21478 %load = load double, double* %__b
21479 %vec = insertelement <2 x double> undef, double %load, i32 0
21480 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21481 %2 = fcmp oeq <2 x double> %0, %1
21482 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21483 %4 = bitcast <16 x i1> %3 to i16
21487 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21488 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21489 ; VLX: # %bb.0: # %entry
21490 ; VLX-NEXT: kmovd %edi, %k1
21491 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21492 ; VLX-NEXT: kmovd %k0, %eax
21493 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21496 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21497 ; NoVLX: # %bb.0: # %entry
21498 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21499 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21500 ; NoVLX-NEXT: kmovw %edi, %k1
21501 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21502 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21503 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21504 ; NoVLX-NEXT: kmovw %k0, %eax
21505 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21506 ; NoVLX-NEXT: vzeroupper
21509 %0 = bitcast <2 x i64> %__a to <2 x double>
21510 %1 = bitcast <2 x i64> %__b to <2 x double>
21511 %2 = fcmp oeq <2 x double> %0, %1
21512 %3 = bitcast i2 %__u to <2 x i1>
21513 %4 = and <2 x i1> %2, %3
21514 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21515 %6 = bitcast <16 x i1> %5 to i16
21519 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21520 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21521 ; VLX: # %bb.0: # %entry
21522 ; VLX-NEXT: kmovd %edi, %k1
21523 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21524 ; VLX-NEXT: kmovd %k0, %eax
21525 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21528 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21529 ; NoVLX: # %bb.0: # %entry
21530 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21531 ; NoVLX-NEXT: kmovw %edi, %k1
21532 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21533 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21534 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21535 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21536 ; NoVLX-NEXT: kmovw %k0, %eax
21537 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21538 ; NoVLX-NEXT: vzeroupper
21541 %0 = bitcast <2 x i64> %__a to <2 x double>
21542 %load = load <2 x i64>, <2 x i64>* %__b
21543 %1 = bitcast <2 x i64> %load to <2 x double>
21544 %2 = fcmp oeq <2 x double> %0, %1
21545 %3 = bitcast i2 %__u to <2 x i1>
21546 %4 = and <2 x i1> %2, %3
21547 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21548 %6 = bitcast <16 x i1> %5 to i16
21552 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21553 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21554 ; VLX: # %bb.0: # %entry
21555 ; VLX-NEXT: kmovd %edi, %k1
21556 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21557 ; VLX-NEXT: kmovd %k0, %eax
21558 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21561 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21562 ; NoVLX: # %bb.0: # %entry
21563 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21564 ; NoVLX-NEXT: kmovw %edi, %k1
21565 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21566 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21567 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21568 ; NoVLX-NEXT: kmovw %k0, %eax
21569 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21570 ; NoVLX-NEXT: vzeroupper
21573 %0 = bitcast <2 x i64> %__a to <2 x double>
21574 %load = load double, double* %__b
21575 %vec = insertelement <2 x double> undef, double %load, i32 0
21576 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21577 %2 = fcmp oeq <2 x double> %0, %1
21578 %3 = bitcast i2 %__u to <2 x i1>
21579 %4 = and <2 x i1> %2, %3
21580 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21581 %6 = bitcast <16 x i1> %5 to i16
21587 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21588 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
21589 ; VLX: # %bb.0: # %entry
21590 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21591 ; VLX-NEXT: kmovd %k0, %eax
21594 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
21595 ; NoVLX: # %bb.0: # %entry
21596 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21597 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21598 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21599 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21600 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21601 ; NoVLX-NEXT: kmovw %k0, %eax
21602 ; NoVLX-NEXT: vzeroupper
21605 %0 = bitcast <2 x i64> %__a to <2 x double>
21606 %1 = bitcast <2 x i64> %__b to <2 x double>
21607 %2 = fcmp oeq <2 x double> %0, %1
21608 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21609 %4 = bitcast <32 x i1> %3 to i32
21613 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21614 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
21615 ; VLX: # %bb.0: # %entry
21616 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21617 ; VLX-NEXT: kmovd %k0, %eax
21620 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
21621 ; NoVLX: # %bb.0: # %entry
21622 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21623 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21624 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21625 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21626 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21627 ; NoVLX-NEXT: kmovw %k0, %eax
21628 ; NoVLX-NEXT: vzeroupper
21631 %0 = bitcast <2 x i64> %__a to <2 x double>
21632 %load = load <2 x i64>, <2 x i64>* %__b
21633 %1 = bitcast <2 x i64> %load to <2 x double>
21634 %2 = fcmp oeq <2 x double> %0, %1
21635 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21636 %4 = bitcast <32 x i1> %3 to i32
21640 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21641 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21642 ; VLX: # %bb.0: # %entry
21643 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21644 ; VLX-NEXT: kmovd %k0, %eax
21647 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21648 ; NoVLX: # %bb.0: # %entry
21649 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21650 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21651 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21652 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21653 ; NoVLX-NEXT: kmovw %k0, %eax
21654 ; NoVLX-NEXT: vzeroupper
21657 %0 = bitcast <2 x i64> %__a to <2 x double>
21658 %load = load double, double* %__b
21659 %vec = insertelement <2 x double> undef, double %load, i32 0
21660 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21661 %2 = fcmp oeq <2 x double> %0, %1
21662 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21663 %4 = bitcast <32 x i1> %3 to i32
21667 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21668 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
21669 ; VLX: # %bb.0: # %entry
21670 ; VLX-NEXT: kmovd %edi, %k1
21671 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21672 ; VLX-NEXT: kmovd %k0, %eax
21675 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
21676 ; NoVLX: # %bb.0: # %entry
21677 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21678 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21679 ; NoVLX-NEXT: kmovw %edi, %k1
21680 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21681 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21682 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21683 ; NoVLX-NEXT: kmovw %k0, %eax
21684 ; NoVLX-NEXT: vzeroupper
21687 %0 = bitcast <2 x i64> %__a to <2 x double>
21688 %1 = bitcast <2 x i64> %__b to <2 x double>
21689 %2 = fcmp oeq <2 x double> %0, %1
21690 %3 = bitcast i2 %__u to <2 x i1>
21691 %4 = and <2 x i1> %2, %3
21692 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21693 %6 = bitcast <32 x i1> %5 to i32
21697 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21698 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
21699 ; VLX: # %bb.0: # %entry
21700 ; VLX-NEXT: kmovd %edi, %k1
21701 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21702 ; VLX-NEXT: kmovd %k0, %eax
21705 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
21706 ; NoVLX: # %bb.0: # %entry
21707 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21708 ; NoVLX-NEXT: kmovw %edi, %k1
21709 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21710 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21711 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21712 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21713 ; NoVLX-NEXT: kmovw %k0, %eax
21714 ; NoVLX-NEXT: vzeroupper
21717 %0 = bitcast <2 x i64> %__a to <2 x double>
21718 %load = load <2 x i64>, <2 x i64>* %__b
21719 %1 = bitcast <2 x i64> %load to <2 x double>
21720 %2 = fcmp oeq <2 x double> %0, %1
21721 %3 = bitcast i2 %__u to <2 x i1>
21722 %4 = and <2 x i1> %2, %3
21723 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21724 %6 = bitcast <32 x i1> %5 to i32
21728 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21729 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21730 ; VLX: # %bb.0: # %entry
21731 ; VLX-NEXT: kmovd %edi, %k1
21732 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21733 ; VLX-NEXT: kmovd %k0, %eax
21736 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21737 ; NoVLX: # %bb.0: # %entry
21738 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21739 ; NoVLX-NEXT: kmovw %edi, %k1
21740 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21741 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21742 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21743 ; NoVLX-NEXT: kmovw %k0, %eax
21744 ; NoVLX-NEXT: vzeroupper
21747 %0 = bitcast <2 x i64> %__a to <2 x double>
21748 %load = load double, double* %__b
21749 %vec = insertelement <2 x double> undef, double %load, i32 0
21750 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21751 %2 = fcmp oeq <2 x double> %0, %1
21752 %3 = bitcast i2 %__u to <2 x i1>
21753 %4 = and <2 x i1> %2, %3
21754 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21755 %6 = bitcast <32 x i1> %5 to i32
21761 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21762 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
21763 ; VLX: # %bb.0: # %entry
21764 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21765 ; VLX-NEXT: kmovq %k0, %rax
21768 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
21769 ; NoVLX: # %bb.0: # %entry
21770 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21771 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21772 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21773 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21774 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21775 ; NoVLX-NEXT: kmovw %k0, %eax
21776 ; NoVLX-NEXT: vzeroupper
21779 %0 = bitcast <2 x i64> %__a to <2 x double>
21780 %1 = bitcast <2 x i64> %__b to <2 x double>
21781 %2 = fcmp oeq <2 x double> %0, %1
21782 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21783 %4 = bitcast <64 x i1> %3 to i64
21787 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21788 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
21789 ; VLX: # %bb.0: # %entry
21790 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21791 ; VLX-NEXT: kmovq %k0, %rax
21794 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
21795 ; NoVLX: # %bb.0: # %entry
21796 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21797 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21798 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21799 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21800 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21801 ; NoVLX-NEXT: kmovw %k0, %eax
21802 ; NoVLX-NEXT: vzeroupper
21805 %0 = bitcast <2 x i64> %__a to <2 x double>
21806 %load = load <2 x i64>, <2 x i64>* %__b
21807 %1 = bitcast <2 x i64> %load to <2 x double>
21808 %2 = fcmp oeq <2 x double> %0, %1
21809 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21810 %4 = bitcast <64 x i1> %3 to i64
21814 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21815 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21816 ; VLX: # %bb.0: # %entry
21817 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21818 ; VLX-NEXT: kmovq %k0, %rax
21821 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21822 ; NoVLX: # %bb.0: # %entry
21823 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21824 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21825 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21826 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21827 ; NoVLX-NEXT: kmovw %k0, %eax
21828 ; NoVLX-NEXT: vzeroupper
21831 %0 = bitcast <2 x i64> %__a to <2 x double>
21832 %load = load double, double* %__b
21833 %vec = insertelement <2 x double> undef, double %load, i32 0
21834 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21835 %2 = fcmp oeq <2 x double> %0, %1
21836 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21837 %4 = bitcast <64 x i1> %3 to i64
21841 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21842 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
21843 ; VLX: # %bb.0: # %entry
21844 ; VLX-NEXT: kmovd %edi, %k1
21845 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21846 ; VLX-NEXT: kmovq %k0, %rax
21849 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
21850 ; NoVLX: # %bb.0: # %entry
21851 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21852 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21853 ; NoVLX-NEXT: kmovw %edi, %k1
21854 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21855 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21856 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21857 ; NoVLX-NEXT: kmovw %k0, %eax
21858 ; NoVLX-NEXT: vzeroupper
21861 %0 = bitcast <2 x i64> %__a to <2 x double>
21862 %1 = bitcast <2 x i64> %__b to <2 x double>
21863 %2 = fcmp oeq <2 x double> %0, %1
21864 %3 = bitcast i2 %__u to <2 x i1>
21865 %4 = and <2 x i1> %2, %3
21866 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21867 %6 = bitcast <64 x i1> %5 to i64
21871 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21872 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
21873 ; VLX: # %bb.0: # %entry
21874 ; VLX-NEXT: kmovd %edi, %k1
21875 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21876 ; VLX-NEXT: kmovq %k0, %rax
21879 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
21880 ; NoVLX: # %bb.0: # %entry
21881 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21882 ; NoVLX-NEXT: kmovw %edi, %k1
21883 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21884 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21885 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21886 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21887 ; NoVLX-NEXT: kmovw %k0, %eax
21888 ; NoVLX-NEXT: vzeroupper
21891 %0 = bitcast <2 x i64> %__a to <2 x double>
21892 %load = load <2 x i64>, <2 x i64>* %__b
21893 %1 = bitcast <2 x i64> %load to <2 x double>
21894 %2 = fcmp oeq <2 x double> %0, %1
21895 %3 = bitcast i2 %__u to <2 x i1>
21896 %4 = and <2 x i1> %2, %3
21897 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21898 %6 = bitcast <64 x i1> %5 to i64
21902 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21903 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21904 ; VLX: # %bb.0: # %entry
21905 ; VLX-NEXT: kmovd %edi, %k1
21906 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21907 ; VLX-NEXT: kmovq %k0, %rax
21910 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21911 ; NoVLX: # %bb.0: # %entry
21912 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21913 ; NoVLX-NEXT: kmovw %edi, %k1
21914 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21915 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21916 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21917 ; NoVLX-NEXT: kmovw %k0, %eax
21918 ; NoVLX-NEXT: vzeroupper
21921 %0 = bitcast <2 x i64> %__a to <2 x double>
21922 %load = load double, double* %__b
21923 %vec = insertelement <2 x double> undef, double %load, i32 0
21924 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21925 %2 = fcmp oeq <2 x double> %0, %1
21926 %3 = bitcast i2 %__u to <2 x i1>
21927 %4 = and <2 x i1> %2, %3
21928 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21929 %6 = bitcast <64 x i1> %5 to i64
21935 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
21936 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
21937 ; VLX: # %bb.0: # %entry
21938 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
21939 ; VLX-NEXT: kmovd %k0, %eax
21940 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21941 ; VLX-NEXT: vzeroupper
21944 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
21945 ; NoVLX: # %bb.0: # %entry
21946 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
21947 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
21948 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21949 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
21950 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
21951 ; NoVLX-NEXT: kmovw %k0, %eax
21952 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21953 ; NoVLX-NEXT: vzeroupper
21956 %0 = bitcast <4 x i64> %__a to <4 x double>
21957 %1 = bitcast <4 x i64> %__b to <4 x double>
21958 %2 = fcmp oeq <4 x double> %0, %1
21959 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21960 %4 = bitcast <8 x i1> %3 to i8
21964 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
21965 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
21966 ; VLX: # %bb.0: # %entry
21967 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
21968 ; VLX-NEXT: kmovd %k0, %eax
21969 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21970 ; VLX-NEXT: vzeroupper
21973 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
21974 ; NoVLX: # %bb.0: # %entry
21975 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
21976 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
21977 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21978 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
21979 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
21980 ; NoVLX-NEXT: kmovw %k0, %eax
21981 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21982 ; NoVLX-NEXT: vzeroupper
21985 %0 = bitcast <4 x i64> %__a to <4 x double>
21986 %load = load <4 x i64>, <4 x i64>* %__b
21987 %1 = bitcast <4 x i64> %load to <4 x double>
21988 %2 = fcmp oeq <4 x double> %0, %1
21989 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21990 %4 = bitcast <8 x i1> %3 to i8
21994 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
21995 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
21996 ; VLX: # %bb.0: # %entry
21997 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
21998 ; VLX-NEXT: kmovd %k0, %eax
21999 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22000 ; VLX-NEXT: vzeroupper
22003 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22004 ; NoVLX: # %bb.0: # %entry
22005 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22006 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22007 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22008 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22009 ; NoVLX-NEXT: kmovw %k0, %eax
22010 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22011 ; NoVLX-NEXT: vzeroupper
22014 %0 = bitcast <4 x i64> %__a to <4 x double>
22015 %load = load double, double* %__b
22016 %vec = insertelement <4 x double> undef, double %load, i32 0
22017 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22018 %2 = fcmp oeq <4 x double> %0, %1
22019 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22020 %4 = bitcast <8 x i1> %3 to i8
22024 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22025 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22026 ; VLX: # %bb.0: # %entry
22027 ; VLX-NEXT: kmovd %edi, %k1
22028 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22029 ; VLX-NEXT: kmovd %k0, %eax
22030 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22031 ; VLX-NEXT: vzeroupper
22034 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22035 ; NoVLX: # %bb.0: # %entry
22036 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22037 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22038 ; NoVLX-NEXT: kmovw %edi, %k1
22039 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22040 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22041 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22042 ; NoVLX-NEXT: kmovw %k0, %eax
22043 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22044 ; NoVLX-NEXT: vzeroupper
22047 %0 = bitcast <4 x i64> %__a to <4 x double>
22048 %1 = bitcast <4 x i64> %__b to <4 x double>
22049 %2 = fcmp oeq <4 x double> %0, %1
22050 %3 = bitcast i4 %__u to <4 x i1>
22051 %4 = and <4 x i1> %2, %3
22052 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22053 %6 = bitcast <8 x i1> %5 to i8
22057 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22058 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22059 ; VLX: # %bb.0: # %entry
22060 ; VLX-NEXT: kmovd %edi, %k1
22061 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22062 ; VLX-NEXT: kmovd %k0, %eax
22063 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22064 ; VLX-NEXT: vzeroupper
22067 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22068 ; NoVLX: # %bb.0: # %entry
22069 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22070 ; NoVLX-NEXT: kmovw %edi, %k1
22071 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22072 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22073 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22074 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22075 ; NoVLX-NEXT: kmovw %k0, %eax
22076 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22077 ; NoVLX-NEXT: vzeroupper
22080 %0 = bitcast <4 x i64> %__a to <4 x double>
22081 %load = load <4 x i64>, <4 x i64>* %__b
22082 %1 = bitcast <4 x i64> %load to <4 x double>
22083 %2 = fcmp oeq <4 x double> %0, %1
22084 %3 = bitcast i4 %__u to <4 x i1>
22085 %4 = and <4 x i1> %2, %3
22086 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22087 %6 = bitcast <8 x i1> %5 to i8
22091 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22092 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22093 ; VLX: # %bb.0: # %entry
22094 ; VLX-NEXT: kmovd %edi, %k1
22095 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22096 ; VLX-NEXT: kmovd %k0, %eax
22097 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22098 ; VLX-NEXT: vzeroupper
22101 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22102 ; NoVLX: # %bb.0: # %entry
22103 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22104 ; NoVLX-NEXT: kmovw %edi, %k1
22105 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22106 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22107 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22108 ; NoVLX-NEXT: kmovw %k0, %eax
22109 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22110 ; NoVLX-NEXT: vzeroupper
22113 %0 = bitcast <4 x i64> %__a to <4 x double>
22114 %load = load double, double* %__b
22115 %vec = insertelement <4 x double> undef, double %load, i32 0
22116 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22117 %2 = fcmp oeq <4 x double> %0, %1
22118 %3 = bitcast i4 %__u to <4 x i1>
22119 %4 = and <4 x i1> %2, %3
22120 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22121 %6 = bitcast <8 x i1> %5 to i8
22127 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22128 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22129 ; VLX: # %bb.0: # %entry
22130 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22131 ; VLX-NEXT: kmovd %k0, %eax
22132 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22133 ; VLX-NEXT: vzeroupper
22136 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22137 ; NoVLX: # %bb.0: # %entry
22138 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22139 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22140 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22141 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22142 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22143 ; NoVLX-NEXT: kmovw %k0, %eax
22144 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22145 ; NoVLX-NEXT: vzeroupper
22148 %0 = bitcast <4 x i64> %__a to <4 x double>
22149 %1 = bitcast <4 x i64> %__b to <4 x double>
22150 %2 = fcmp oeq <4 x double> %0, %1
22151 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22152 %4 = bitcast <16 x i1> %3 to i16
22156 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22157 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22158 ; VLX: # %bb.0: # %entry
22159 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22160 ; VLX-NEXT: kmovd %k0, %eax
22161 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22162 ; VLX-NEXT: vzeroupper
22165 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22166 ; NoVLX: # %bb.0: # %entry
22167 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22168 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22169 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22170 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22171 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22172 ; NoVLX-NEXT: kmovw %k0, %eax
22173 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22174 ; NoVLX-NEXT: vzeroupper
22177 %0 = bitcast <4 x i64> %__a to <4 x double>
22178 %load = load <4 x i64>, <4 x i64>* %__b
22179 %1 = bitcast <4 x i64> %load to <4 x double>
22180 %2 = fcmp oeq <4 x double> %0, %1
22181 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22182 %4 = bitcast <16 x i1> %3 to i16
22186 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22187 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22188 ; VLX: # %bb.0: # %entry
22189 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22190 ; VLX-NEXT: kmovd %k0, %eax
22191 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22192 ; VLX-NEXT: vzeroupper
22195 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22196 ; NoVLX: # %bb.0: # %entry
22197 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22198 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22199 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22200 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22201 ; NoVLX-NEXT: kmovw %k0, %eax
22202 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22203 ; NoVLX-NEXT: vzeroupper
22206 %0 = bitcast <4 x i64> %__a to <4 x double>
22207 %load = load double, double* %__b
22208 %vec = insertelement <4 x double> undef, double %load, i32 0
22209 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22210 %2 = fcmp oeq <4 x double> %0, %1
22211 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22212 %4 = bitcast <16 x i1> %3 to i16
22216 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22217 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22218 ; VLX: # %bb.0: # %entry
22219 ; VLX-NEXT: kmovd %edi, %k1
22220 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22221 ; VLX-NEXT: kmovd %k0, %eax
22222 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22223 ; VLX-NEXT: vzeroupper
22226 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22227 ; NoVLX: # %bb.0: # %entry
22228 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22229 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22230 ; NoVLX-NEXT: kmovw %edi, %k1
22231 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22232 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22233 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22234 ; NoVLX-NEXT: kmovw %k0, %eax
22235 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22236 ; NoVLX-NEXT: vzeroupper
22239 %0 = bitcast <4 x i64> %__a to <4 x double>
22240 %1 = bitcast <4 x i64> %__b to <4 x double>
22241 %2 = fcmp oeq <4 x double> %0, %1
22242 %3 = bitcast i4 %__u to <4 x i1>
22243 %4 = and <4 x i1> %2, %3
22244 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22245 %6 = bitcast <16 x i1> %5 to i16
22249 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22250 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22251 ; VLX: # %bb.0: # %entry
22252 ; VLX-NEXT: kmovd %edi, %k1
22253 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22254 ; VLX-NEXT: kmovd %k0, %eax
22255 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22256 ; VLX-NEXT: vzeroupper
22259 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22260 ; NoVLX: # %bb.0: # %entry
22261 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22262 ; NoVLX-NEXT: kmovw %edi, %k1
22263 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22264 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22265 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22266 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22267 ; NoVLX-NEXT: kmovw %k0, %eax
22268 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22269 ; NoVLX-NEXT: vzeroupper
22272 %0 = bitcast <4 x i64> %__a to <4 x double>
22273 %load = load <4 x i64>, <4 x i64>* %__b
22274 %1 = bitcast <4 x i64> %load to <4 x double>
22275 %2 = fcmp oeq <4 x double> %0, %1
22276 %3 = bitcast i4 %__u to <4 x i1>
22277 %4 = and <4 x i1> %2, %3
22278 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22279 %6 = bitcast <16 x i1> %5 to i16
22283 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22284 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22285 ; VLX: # %bb.0: # %entry
22286 ; VLX-NEXT: kmovd %edi, %k1
22287 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22288 ; VLX-NEXT: kmovd %k0, %eax
22289 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22290 ; VLX-NEXT: vzeroupper
22293 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22294 ; NoVLX: # %bb.0: # %entry
22295 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22296 ; NoVLX-NEXT: kmovw %edi, %k1
22297 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22298 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22299 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22300 ; NoVLX-NEXT: kmovw %k0, %eax
22301 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22302 ; NoVLX-NEXT: vzeroupper
22305 %0 = bitcast <4 x i64> %__a to <4 x double>
22306 %load = load double, double* %__b
22307 %vec = insertelement <4 x double> undef, double %load, i32 0
22308 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22309 %2 = fcmp oeq <4 x double> %0, %1
22310 %3 = bitcast i4 %__u to <4 x i1>
22311 %4 = and <4 x i1> %2, %3
22312 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22313 %6 = bitcast <16 x i1> %5 to i16
22319 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22320 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22321 ; VLX: # %bb.0: # %entry
22322 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22323 ; VLX-NEXT: kmovd %k0, %eax
22324 ; VLX-NEXT: vzeroupper
22327 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22328 ; NoVLX: # %bb.0: # %entry
22329 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22330 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22331 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22332 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22333 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22334 ; NoVLX-NEXT: kmovw %k0, %eax
22335 ; NoVLX-NEXT: vzeroupper
22338 %0 = bitcast <4 x i64> %__a to <4 x double>
22339 %1 = bitcast <4 x i64> %__b to <4 x double>
22340 %2 = fcmp oeq <4 x double> %0, %1
22341 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22342 %4 = bitcast <32 x i1> %3 to i32
22346 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22347 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22348 ; VLX: # %bb.0: # %entry
22349 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22350 ; VLX-NEXT: kmovd %k0, %eax
22351 ; VLX-NEXT: vzeroupper
22354 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22355 ; NoVLX: # %bb.0: # %entry
22356 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22357 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22358 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22359 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22360 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22361 ; NoVLX-NEXT: kmovw %k0, %eax
22362 ; NoVLX-NEXT: vzeroupper
22365 %0 = bitcast <4 x i64> %__a to <4 x double>
22366 %load = load <4 x i64>, <4 x i64>* %__b
22367 %1 = bitcast <4 x i64> %load to <4 x double>
22368 %2 = fcmp oeq <4 x double> %0, %1
22369 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22370 %4 = bitcast <32 x i1> %3 to i32
22374 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22375 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22376 ; VLX: # %bb.0: # %entry
22377 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22378 ; VLX-NEXT: kmovd %k0, %eax
22379 ; VLX-NEXT: vzeroupper
22382 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22383 ; NoVLX: # %bb.0: # %entry
22384 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22385 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22386 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22387 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22388 ; NoVLX-NEXT: kmovw %k0, %eax
22389 ; NoVLX-NEXT: vzeroupper
22392 %0 = bitcast <4 x i64> %__a to <4 x double>
22393 %load = load double, double* %__b
22394 %vec = insertelement <4 x double> undef, double %load, i32 0
22395 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22396 %2 = fcmp oeq <4 x double> %0, %1
22397 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22398 %4 = bitcast <32 x i1> %3 to i32
22402 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22403 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22404 ; VLX: # %bb.0: # %entry
22405 ; VLX-NEXT: kmovd %edi, %k1
22406 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22407 ; VLX-NEXT: kmovd %k0, %eax
22408 ; VLX-NEXT: vzeroupper
22411 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22412 ; NoVLX: # %bb.0: # %entry
22413 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22414 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22415 ; NoVLX-NEXT: kmovw %edi, %k1
22416 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22417 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22418 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22419 ; NoVLX-NEXT: kmovw %k0, %eax
22420 ; NoVLX-NEXT: vzeroupper
22423 %0 = bitcast <4 x i64> %__a to <4 x double>
22424 %1 = bitcast <4 x i64> %__b to <4 x double>
22425 %2 = fcmp oeq <4 x double> %0, %1
22426 %3 = bitcast i4 %__u to <4 x i1>
22427 %4 = and <4 x i1> %2, %3
22428 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22429 %6 = bitcast <32 x i1> %5 to i32
22433 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22434 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22435 ; VLX: # %bb.0: # %entry
22436 ; VLX-NEXT: kmovd %edi, %k1
22437 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22438 ; VLX-NEXT: kmovd %k0, %eax
22439 ; VLX-NEXT: vzeroupper
22442 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22443 ; NoVLX: # %bb.0: # %entry
22444 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22445 ; NoVLX-NEXT: kmovw %edi, %k1
22446 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22447 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22448 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22449 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22450 ; NoVLX-NEXT: kmovw %k0, %eax
22451 ; NoVLX-NEXT: vzeroupper
22454 %0 = bitcast <4 x i64> %__a to <4 x double>
22455 %load = load <4 x i64>, <4 x i64>* %__b
22456 %1 = bitcast <4 x i64> %load to <4 x double>
22457 %2 = fcmp oeq <4 x double> %0, %1
22458 %3 = bitcast i4 %__u to <4 x i1>
22459 %4 = and <4 x i1> %2, %3
22460 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22461 %6 = bitcast <32 x i1> %5 to i32
22465 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22466 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22467 ; VLX: # %bb.0: # %entry
22468 ; VLX-NEXT: kmovd %edi, %k1
22469 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22470 ; VLX-NEXT: kmovd %k0, %eax
22471 ; VLX-NEXT: vzeroupper
22474 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22475 ; NoVLX: # %bb.0: # %entry
22476 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22477 ; NoVLX-NEXT: kmovw %edi, %k1
22478 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22479 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22480 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22481 ; NoVLX-NEXT: kmovw %k0, %eax
22482 ; NoVLX-NEXT: vzeroupper
22485 %0 = bitcast <4 x i64> %__a to <4 x double>
22486 %load = load double, double* %__b
22487 %vec = insertelement <4 x double> undef, double %load, i32 0
22488 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22489 %2 = fcmp oeq <4 x double> %0, %1
22490 %3 = bitcast i4 %__u to <4 x i1>
22491 %4 = and <4 x i1> %2, %3
22492 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22493 %6 = bitcast <32 x i1> %5 to i32
22499 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22500 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22501 ; VLX: # %bb.0: # %entry
22502 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22503 ; VLX-NEXT: kmovq %k0, %rax
22504 ; VLX-NEXT: vzeroupper
22507 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22508 ; NoVLX: # %bb.0: # %entry
22509 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22510 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22511 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22512 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22513 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22514 ; NoVLX-NEXT: kmovw %k0, %eax
22515 ; NoVLX-NEXT: vzeroupper
22518 %0 = bitcast <4 x i64> %__a to <4 x double>
22519 %1 = bitcast <4 x i64> %__b to <4 x double>
22520 %2 = fcmp oeq <4 x double> %0, %1
22521 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22522 %4 = bitcast <64 x i1> %3 to i64
22526 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22527 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22528 ; VLX: # %bb.0: # %entry
22529 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22530 ; VLX-NEXT: kmovq %k0, %rax
22531 ; VLX-NEXT: vzeroupper
22534 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22535 ; NoVLX: # %bb.0: # %entry
22536 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22537 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22538 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22539 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22540 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22541 ; NoVLX-NEXT: kmovw %k0, %eax
22542 ; NoVLX-NEXT: vzeroupper
22545 %0 = bitcast <4 x i64> %__a to <4 x double>
22546 %load = load <4 x i64>, <4 x i64>* %__b
22547 %1 = bitcast <4 x i64> %load to <4 x double>
22548 %2 = fcmp oeq <4 x double> %0, %1
22549 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22550 %4 = bitcast <64 x i1> %3 to i64
22554 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22555 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22556 ; VLX: # %bb.0: # %entry
22557 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22558 ; VLX-NEXT: kmovq %k0, %rax
22559 ; VLX-NEXT: vzeroupper
22562 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22563 ; NoVLX: # %bb.0: # %entry
22564 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22565 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22566 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22567 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22568 ; NoVLX-NEXT: kmovw %k0, %eax
22569 ; NoVLX-NEXT: vzeroupper
22572 %0 = bitcast <4 x i64> %__a to <4 x double>
22573 %load = load double, double* %__b
22574 %vec = insertelement <4 x double> undef, double %load, i32 0
22575 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22576 %2 = fcmp oeq <4 x double> %0, %1
22577 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22578 %4 = bitcast <64 x i1> %3 to i64
22582 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22583 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
22584 ; VLX: # %bb.0: # %entry
22585 ; VLX-NEXT: kmovd %edi, %k1
22586 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22587 ; VLX-NEXT: kmovq %k0, %rax
22588 ; VLX-NEXT: vzeroupper
22591 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
22592 ; NoVLX: # %bb.0: # %entry
22593 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22594 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22595 ; NoVLX-NEXT: kmovw %edi, %k1
22596 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22597 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22598 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22599 ; NoVLX-NEXT: kmovw %k0, %eax
22600 ; NoVLX-NEXT: vzeroupper
22603 %0 = bitcast <4 x i64> %__a to <4 x double>
22604 %1 = bitcast <4 x i64> %__b to <4 x double>
22605 %2 = fcmp oeq <4 x double> %0, %1
22606 %3 = bitcast i4 %__u to <4 x i1>
22607 %4 = and <4 x i1> %2, %3
22608 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22609 %6 = bitcast <64 x i1> %5 to i64
22613 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22614 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
22615 ; VLX: # %bb.0: # %entry
22616 ; VLX-NEXT: kmovd %edi, %k1
22617 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22618 ; VLX-NEXT: kmovq %k0, %rax
22619 ; VLX-NEXT: vzeroupper
22622 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
22623 ; NoVLX: # %bb.0: # %entry
22624 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22625 ; NoVLX-NEXT: kmovw %edi, %k1
22626 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22627 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22628 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22629 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22630 ; NoVLX-NEXT: kmovw %k0, %eax
22631 ; NoVLX-NEXT: vzeroupper
22634 %0 = bitcast <4 x i64> %__a to <4 x double>
22635 %load = load <4 x i64>, <4 x i64>* %__b
22636 %1 = bitcast <4 x i64> %load to <4 x double>
22637 %2 = fcmp oeq <4 x double> %0, %1
22638 %3 = bitcast i4 %__u to <4 x i1>
22639 %4 = and <4 x i1> %2, %3
22640 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22641 %6 = bitcast <64 x i1> %5 to i64
22645 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22646 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22647 ; VLX: # %bb.0: # %entry
22648 ; VLX-NEXT: kmovd %edi, %k1
22649 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22650 ; VLX-NEXT: kmovq %k0, %rax
22651 ; VLX-NEXT: vzeroupper
22654 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22655 ; NoVLX: # %bb.0: # %entry
22656 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22657 ; NoVLX-NEXT: kmovw %edi, %k1
22658 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22659 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22660 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22661 ; NoVLX-NEXT: kmovw %k0, %eax
22662 ; NoVLX-NEXT: vzeroupper
22665 %0 = bitcast <4 x i64> %__a to <4 x double>
22666 %load = load double, double* %__b
22667 %vec = insertelement <4 x double> undef, double %load, i32 0
22668 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22669 %2 = fcmp oeq <4 x double> %0, %1
22670 %3 = bitcast i4 %__u to <4 x i1>
22671 %4 = and <4 x i1> %2, %3
22672 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22673 %6 = bitcast <64 x i1> %5 to i64
22679 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22680 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
22681 ; VLX: # %bb.0: # %entry
22682 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22683 ; VLX-NEXT: kmovd %k0, %eax
22684 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22685 ; VLX-NEXT: vzeroupper
22688 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
22689 ; NoVLX: # %bb.0: # %entry
22690 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22691 ; NoVLX-NEXT: kmovw %k0, %eax
22692 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22693 ; NoVLX-NEXT: vzeroupper
22696 %0 = bitcast <8 x i64> %__a to <8 x double>
22697 %1 = bitcast <8 x i64> %__b to <8 x double>
22698 %2 = fcmp oeq <8 x double> %0, %1
22699 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22700 %4 = bitcast <16 x i1> %3 to i16
22704 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
22705 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
22706 ; VLX: # %bb.0: # %entry
22707 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
22708 ; VLX-NEXT: kmovd %k0, %eax
22709 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22710 ; VLX-NEXT: vzeroupper
22713 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
22714 ; NoVLX: # %bb.0: # %entry
22715 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
22716 ; NoVLX-NEXT: kmovw %k0, %eax
22717 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22718 ; NoVLX-NEXT: vzeroupper
22721 %0 = bitcast <8 x i64> %__a to <8 x double>
22722 %load = load <8 x i64>, <8 x i64>* %__b
22723 %1 = bitcast <8 x i64> %load to <8 x double>
22724 %2 = fcmp oeq <8 x double> %0, %1
22725 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22726 %4 = bitcast <16 x i1> %3 to i16
22730 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
22731 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22732 ; VLX: # %bb.0: # %entry
22733 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22734 ; VLX-NEXT: kmovd %k0, %eax
22735 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22736 ; VLX-NEXT: vzeroupper
22739 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22740 ; NoVLX: # %bb.0: # %entry
22741 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22742 ; NoVLX-NEXT: kmovw %k0, %eax
22743 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22744 ; NoVLX-NEXT: vzeroupper
22747 %0 = bitcast <8 x i64> %__a to <8 x double>
22748 %load = load double, double* %__b
22749 %vec = insertelement <8 x double> undef, double %load, i32 0
22750 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22751 %2 = fcmp oeq <8 x double> %0, %1
22752 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22753 %4 = bitcast <16 x i1> %3 to i16
22757 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22758 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
22759 ; VLX: # %bb.0: # %entry
22760 ; VLX-NEXT: kmovd %edi, %k1
22761 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22762 ; VLX-NEXT: kmovd %k0, %eax
22763 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22764 ; VLX-NEXT: vzeroupper
22767 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
22768 ; NoVLX: # %bb.0: # %entry
22769 ; NoVLX-NEXT: kmovw %edi, %k1
22770 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22771 ; NoVLX-NEXT: kmovw %k0, %eax
22772 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22773 ; NoVLX-NEXT: vzeroupper
22776 %0 = bitcast <8 x i64> %__a to <8 x double>
22777 %1 = bitcast <8 x i64> %__b to <8 x double>
22778 %2 = fcmp oeq <8 x double> %0, %1
22779 %3 = bitcast i8 %__u to <8 x i1>
22780 %4 = and <8 x i1> %2, %3
22781 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22782 %6 = bitcast <16 x i1> %5 to i16
22786 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
22787 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
22788 ; VLX: # %bb.0: # %entry
22789 ; VLX-NEXT: kmovd %edi, %k1
22790 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
22791 ; VLX-NEXT: kmovd %k0, %eax
22792 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22793 ; VLX-NEXT: vzeroupper
22796 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
22797 ; NoVLX: # %bb.0: # %entry
22798 ; NoVLX-NEXT: kmovw %edi, %k1
22799 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
22800 ; NoVLX-NEXT: kmovw %k0, %eax
22801 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22802 ; NoVLX-NEXT: vzeroupper
22805 %0 = bitcast <8 x i64> %__a to <8 x double>
22806 %load = load <8 x i64>, <8 x i64>* %__b
22807 %1 = bitcast <8 x i64> %load to <8 x double>
22808 %2 = fcmp oeq <8 x double> %0, %1
22809 %3 = bitcast i8 %__u to <8 x i1>
22810 %4 = and <8 x i1> %2, %3
22811 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22812 %6 = bitcast <16 x i1> %5 to i16
22816 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
22817 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22818 ; VLX: # %bb.0: # %entry
22819 ; VLX-NEXT: kmovd %edi, %k1
22820 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22821 ; VLX-NEXT: kmovd %k0, %eax
22822 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22823 ; VLX-NEXT: vzeroupper
22826 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22827 ; NoVLX: # %bb.0: # %entry
22828 ; NoVLX-NEXT: kmovw %edi, %k1
22829 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22830 ; NoVLX-NEXT: kmovw %k0, %eax
22831 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22832 ; NoVLX-NEXT: vzeroupper
22835 %0 = bitcast <8 x i64> %__a to <8 x double>
22836 %load = load double, double* %__b
22837 %vec = insertelement <8 x double> undef, double %load, i32 0
22838 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22839 %2 = fcmp oeq <8 x double> %0, %1
22840 %3 = bitcast i8 %__u to <8 x i1>
22841 %4 = and <8 x i1> %2, %3
22842 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22843 %6 = bitcast <16 x i1> %5 to i16
22849 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22850 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
22851 ; VLX: # %bb.0: # %entry
22852 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
22853 ; VLX-NEXT: kmovd %k0, %eax
22854 ; VLX-NEXT: movzbl %al, %eax
22855 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22856 ; VLX-NEXT: vzeroupper
22859 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
22860 ; NoVLX: # %bb.0: # %entry
22861 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
22862 ; NoVLX-NEXT: kmovw %k0, %eax
22863 ; NoVLX-NEXT: movzbl %al, %eax
22864 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22865 ; NoVLX-NEXT: vzeroupper
22868 %0 = bitcast <8 x i64> %__a to <8 x double>
22869 %1 = bitcast <8 x i64> %__b to <8 x double>
22870 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
22871 %3 = bitcast <8 x i1> %2 to i8
22872 %4 = zext i8 %3 to i16
22876 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22877 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
22878 ; VLX: # %bb.0: # %entry
22879 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
22880 ; VLX-NEXT: kmovd %k0, %eax
22881 ; VLX-NEXT: andb %dil, %al
22882 ; VLX-NEXT: movzbl %al, %eax
22883 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22884 ; VLX-NEXT: vzeroupper
22887 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
22888 ; NoVLX: # %bb.0: # %entry
22889 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
22890 ; NoVLX-NEXT: kmovw %k0, %eax
22891 ; NoVLX-NEXT: andb %dil, %al
22892 ; NoVLX-NEXT: movzbl %al, %eax
22893 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22894 ; NoVLX-NEXT: vzeroupper
22897 %0 = bitcast <8 x i64> %__a to <8 x double>
22898 %1 = bitcast <8 x i64> %__b to <8 x double>
22899 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
22900 %3 = bitcast i8 %__u to <8 x i1>
22901 %4 = and <8 x i1> %2, %3
22902 %5 = bitcast <8 x i1> %4 to i8
22903 %6 = zext i8 %5 to i16
22909 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22910 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
22911 ; VLX: # %bb.0: # %entry
22912 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22913 ; VLX-NEXT: kmovd %k0, %eax
22914 ; VLX-NEXT: vzeroupper
22917 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
22918 ; NoVLX: # %bb.0: # %entry
22919 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22920 ; NoVLX-NEXT: kmovw %k0, %eax
22921 ; NoVLX-NEXT: vzeroupper
22924 %0 = bitcast <8 x i64> %__a to <8 x double>
22925 %1 = bitcast <8 x i64> %__b to <8 x double>
22926 %2 = fcmp oeq <8 x double> %0, %1
22927 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22928 %4 = bitcast <32 x i1> %3 to i32
22932 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
22933 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
22934 ; VLX: # %bb.0: # %entry
22935 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
22936 ; VLX-NEXT: kmovd %k0, %eax
22937 ; VLX-NEXT: vzeroupper
22940 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
22941 ; NoVLX: # %bb.0: # %entry
22942 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
22943 ; NoVLX-NEXT: kmovw %k0, %eax
22944 ; NoVLX-NEXT: vzeroupper
22947 %0 = bitcast <8 x i64> %__a to <8 x double>
22948 %load = load <8 x i64>, <8 x i64>* %__b
22949 %1 = bitcast <8 x i64> %load to <8 x double>
22950 %2 = fcmp oeq <8 x double> %0, %1
22951 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22952 %4 = bitcast <32 x i1> %3 to i32
22956 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
22957 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
22958 ; VLX: # %bb.0: # %entry
22959 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22960 ; VLX-NEXT: kmovd %k0, %eax
22961 ; VLX-NEXT: vzeroupper
22964 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
22965 ; NoVLX: # %bb.0: # %entry
22966 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22967 ; NoVLX-NEXT: kmovw %k0, %eax
22968 ; NoVLX-NEXT: vzeroupper
22971 %0 = bitcast <8 x i64> %__a to <8 x double>
22972 %load = load double, double* %__b
22973 %vec = insertelement <8 x double> undef, double %load, i32 0
22974 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22975 %2 = fcmp oeq <8 x double> %0, %1
22976 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22977 %4 = bitcast <32 x i1> %3 to i32
22981 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22982 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
22983 ; VLX: # %bb.0: # %entry
22984 ; VLX-NEXT: kmovd %edi, %k1
22985 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22986 ; VLX-NEXT: kmovd %k0, %eax
22987 ; VLX-NEXT: vzeroupper
22990 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
22991 ; NoVLX: # %bb.0: # %entry
22992 ; NoVLX-NEXT: kmovw %edi, %k1
22993 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22994 ; NoVLX-NEXT: kmovw %k0, %eax
22995 ; NoVLX-NEXT: vzeroupper
22998 %0 = bitcast <8 x i64> %__a to <8 x double>
22999 %1 = bitcast <8 x i64> %__b to <8 x double>
23000 %2 = fcmp oeq <8 x double> %0, %1
23001 %3 = bitcast i8 %__u to <8 x i1>
23002 %4 = and <8 x i1> %2, %3
23003 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23004 %6 = bitcast <32 x i1> %5 to i32
23008 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23009 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23010 ; VLX: # %bb.0: # %entry
23011 ; VLX-NEXT: kmovd %edi, %k1
23012 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23013 ; VLX-NEXT: kmovd %k0, %eax
23014 ; VLX-NEXT: vzeroupper
23017 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23018 ; NoVLX: # %bb.0: # %entry
23019 ; NoVLX-NEXT: kmovw %edi, %k1
23020 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23021 ; NoVLX-NEXT: kmovw %k0, %eax
23022 ; NoVLX-NEXT: vzeroupper
23025 %0 = bitcast <8 x i64> %__a to <8 x double>
23026 %load = load <8 x i64>, <8 x i64>* %__b
23027 %1 = bitcast <8 x i64> %load to <8 x double>
23028 %2 = fcmp oeq <8 x double> %0, %1
23029 %3 = bitcast i8 %__u to <8 x i1>
23030 %4 = and <8 x i1> %2, %3
23031 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23032 %6 = bitcast <32 x i1> %5 to i32
23036 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
23037 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23038 ; VLX: # %bb.0: # %entry
23039 ; VLX-NEXT: kmovd %edi, %k1
23040 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23041 ; VLX-NEXT: kmovd %k0, %eax
23042 ; VLX-NEXT: vzeroupper
23045 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23046 ; NoVLX: # %bb.0: # %entry
23047 ; NoVLX-NEXT: kmovw %edi, %k1
23048 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23049 ; NoVLX-NEXT: kmovw %k0, %eax
23050 ; NoVLX-NEXT: vzeroupper
23053 %0 = bitcast <8 x i64> %__a to <8 x double>
23054 %load = load double, double* %__b
23055 %vec = insertelement <8 x double> undef, double %load, i32 0
23056 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23057 %2 = fcmp oeq <8 x double> %0, %1
23058 %3 = bitcast i8 %__u to <8 x i1>
23059 %4 = and <8 x i1> %2, %3
23060 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23061 %6 = bitcast <32 x i1> %5 to i32
23067 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23068 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23069 ; VLX: # %bb.0: # %entry
23070 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23071 ; VLX-NEXT: kmovb %k0, %eax
23072 ; VLX-NEXT: vzeroupper
23075 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23076 ; NoVLX: # %bb.0: # %entry
23077 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23078 ; NoVLX-NEXT: kmovw %k0, %eax
23079 ; NoVLX-NEXT: movzbl %al, %eax
23080 ; NoVLX-NEXT: vzeroupper
23083 %0 = bitcast <8 x i64> %__a to <8 x double>
23084 %1 = bitcast <8 x i64> %__b to <8 x double>
23085 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23086 %3 = bitcast <8 x i1> %2 to i8
23087 %4 = zext i8 %3 to i32
23091 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23092 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23093 ; VLX: # %bb.0: # %entry
23094 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23095 ; VLX-NEXT: kmovd %k0, %eax
23096 ; VLX-NEXT: andb %dil, %al
23097 ; VLX-NEXT: movzbl %al, %eax
23098 ; VLX-NEXT: vzeroupper
23101 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23102 ; NoVLX: # %bb.0: # %entry
23103 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23104 ; NoVLX-NEXT: kmovw %k0, %eax
23105 ; NoVLX-NEXT: andb %dil, %al
23106 ; NoVLX-NEXT: movzbl %al, %eax
23107 ; NoVLX-NEXT: vzeroupper
23110 %0 = bitcast <8 x i64> %__a to <8 x double>
23111 %1 = bitcast <8 x i64> %__b to <8 x double>
23112 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23113 %3 = bitcast i8 %__u to <8 x i1>
23114 %4 = and <8 x i1> %2, %3
23115 %5 = bitcast <8 x i1> %4 to i8
23116 %6 = zext i8 %5 to i32
23122 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23123 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23124 ; VLX: # %bb.0: # %entry
23125 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23126 ; VLX-NEXT: kmovq %k0, %rax
23127 ; VLX-NEXT: vzeroupper
23130 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23131 ; NoVLX: # %bb.0: # %entry
23132 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23133 ; NoVLX-NEXT: kmovw %k0, %eax
23134 ; NoVLX-NEXT: vzeroupper
23137 %0 = bitcast <8 x i64> %__a to <8 x double>
23138 %1 = bitcast <8 x i64> %__b to <8 x double>
23139 %2 = fcmp oeq <8 x double> %0, %1
23140 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23141 %4 = bitcast <64 x i1> %3 to i64
23145 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23146 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23147 ; VLX: # %bb.0: # %entry
23148 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23149 ; VLX-NEXT: kmovq %k0, %rax
23150 ; VLX-NEXT: vzeroupper
23153 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23154 ; NoVLX: # %bb.0: # %entry
23155 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23156 ; NoVLX-NEXT: kmovw %k0, %eax
23157 ; NoVLX-NEXT: vzeroupper
23160 %0 = bitcast <8 x i64> %__a to <8 x double>
23161 %load = load <8 x i64>, <8 x i64>* %__b
23162 %1 = bitcast <8 x i64> %load to <8 x double>
23163 %2 = fcmp oeq <8 x double> %0, %1
23164 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23165 %4 = bitcast <64 x i1> %3 to i64
23169 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
23170 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23171 ; VLX: # %bb.0: # %entry
23172 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23173 ; VLX-NEXT: kmovq %k0, %rax
23174 ; VLX-NEXT: vzeroupper
23177 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23178 ; NoVLX: # %bb.0: # %entry
23179 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23180 ; NoVLX-NEXT: kmovw %k0, %eax
23181 ; NoVLX-NEXT: vzeroupper
23184 %0 = bitcast <8 x i64> %__a to <8 x double>
23185 %load = load double, double* %__b
23186 %vec = insertelement <8 x double> undef, double %load, i32 0
23187 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23188 %2 = fcmp oeq <8 x double> %0, %1
23189 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23190 %4 = bitcast <64 x i1> %3 to i64
23194 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23195 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23196 ; VLX: # %bb.0: # %entry
23197 ; VLX-NEXT: kmovd %edi, %k1
23198 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23199 ; VLX-NEXT: kmovq %k0, %rax
23200 ; VLX-NEXT: vzeroupper
23203 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23204 ; NoVLX: # %bb.0: # %entry
23205 ; NoVLX-NEXT: kmovw %edi, %k1
23206 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23207 ; NoVLX-NEXT: kmovw %k0, %eax
23208 ; NoVLX-NEXT: vzeroupper
23211 %0 = bitcast <8 x i64> %__a to <8 x double>
23212 %1 = bitcast <8 x i64> %__b to <8 x double>
23213 %2 = fcmp oeq <8 x double> %0, %1
23214 %3 = bitcast i8 %__u to <8 x i1>
23215 %4 = and <8 x i1> %2, %3
23216 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23217 %6 = bitcast <64 x i1> %5 to i64
23221 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23222 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23223 ; VLX: # %bb.0: # %entry
23224 ; VLX-NEXT: kmovd %edi, %k1
23225 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23226 ; VLX-NEXT: kmovq %k0, %rax
23227 ; VLX-NEXT: vzeroupper
23230 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23231 ; NoVLX: # %bb.0: # %entry
23232 ; NoVLX-NEXT: kmovw %edi, %k1
23233 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23234 ; NoVLX-NEXT: kmovw %k0, %eax
23235 ; NoVLX-NEXT: vzeroupper
23238 %0 = bitcast <8 x i64> %__a to <8 x double>
23239 %load = load <8 x i64>, <8 x i64>* %__b
23240 %1 = bitcast <8 x i64> %load to <8 x double>
23241 %2 = fcmp oeq <8 x double> %0, %1
23242 %3 = bitcast i8 %__u to <8 x i1>
23243 %4 = and <8 x i1> %2, %3
23244 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23245 %6 = bitcast <64 x i1> %5 to i64
23249 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
23250 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23251 ; VLX: # %bb.0: # %entry
23252 ; VLX-NEXT: kmovd %edi, %k1
23253 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23254 ; VLX-NEXT: kmovq %k0, %rax
23255 ; VLX-NEXT: vzeroupper
23258 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23259 ; NoVLX: # %bb.0: # %entry
23260 ; NoVLX-NEXT: kmovw %edi, %k1
23261 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23262 ; NoVLX-NEXT: kmovw %k0, %eax
23263 ; NoVLX-NEXT: vzeroupper
23266 %0 = bitcast <8 x i64> %__a to <8 x double>
23267 %load = load double, double* %__b
23268 %vec = insertelement <8 x double> undef, double %load, i32 0
23269 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23270 %2 = fcmp oeq <8 x double> %0, %1
23271 %3 = bitcast i8 %__u to <8 x i1>
23272 %4 = and <8 x i1> %2, %3
23273 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23274 %6 = bitcast <64 x i1> %5 to i64
23280 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23281 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23282 ; VLX: # %bb.0: # %entry
23283 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23284 ; VLX-NEXT: kmovb %k0, %eax
23285 ; VLX-NEXT: vzeroupper
23288 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23289 ; NoVLX: # %bb.0: # %entry
23290 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23291 ; NoVLX-NEXT: kmovw %k0, %eax
23292 ; NoVLX-NEXT: movzbl %al, %eax
23293 ; NoVLX-NEXT: vzeroupper
23296 %0 = bitcast <8 x i64> %__a to <8 x double>
23297 %1 = bitcast <8 x i64> %__b to <8 x double>
23298 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23299 %3 = bitcast <8 x i1> %2 to i8
23300 %4 = zext i8 %3 to i64
23304 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23305 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23306 ; VLX: # %bb.0: # %entry
23307 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23308 ; VLX-NEXT: kmovd %k0, %eax
23309 ; VLX-NEXT: andb %dil, %al
23310 ; VLX-NEXT: movzbl %al, %eax
23311 ; VLX-NEXT: vzeroupper
23314 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23315 ; NoVLX: # %bb.0: # %entry
23316 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23317 ; NoVLX-NEXT: kmovw %k0, %eax
23318 ; NoVLX-NEXT: andb %dil, %al
23319 ; NoVLX-NEXT: movzbl %al, %eax
23320 ; NoVLX-NEXT: vzeroupper
23323 %0 = bitcast <8 x i64> %__a to <8 x double>
23324 %1 = bitcast <8 x i64> %__b to <8 x double>
23325 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23326 %3 = bitcast i8 %__u to <8 x i1>
23327 %4 = and <8 x i1> %2, %3
23328 %5 = bitcast <8 x i1> %4 to i8
23329 %6 = zext i8 %5 to i64
23333 ; Test that we understand that cmpps with rounding zeros the upper bits of the mask register.
23334 define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) {
23335 ; VLX-LABEL: test_cmpm_rnd_zero:
23337 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
23338 ; VLX-NEXT: kmovd %k0, %eax
23339 ; VLX-NEXT: vzeroupper
23342 ; NoVLX-LABEL: test_cmpm_rnd_zero:
23344 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
23345 ; NoVLX-NEXT: kmovw %k0, %eax
23346 ; NoVLX-NEXT: vzeroupper
23348 %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
23349 %1 = bitcast <16 x i1> %res to i16
23350 %cast = bitcast i16 %1 to <16 x i1>
23351 %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
23352 %cast2 = bitcast <32 x i1> %shuffle to i32
23356 define i8 @mask_zero_lower(<4 x i32> %a) {
23357 ; VLX-LABEL: mask_zero_lower:
23359 ; VLX-NEXT: vptestmd %xmm0, %xmm0, %k0
23360 ; VLX-NEXT: kshiftlb $4, %k0, %k0
23361 ; VLX-NEXT: kmovd %k0, %eax
23362 ; VLX-NEXT: # kill: def $al killed $al killed $eax
23365 ; NoVLX-LABEL: mask_zero_lower:
23367 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23368 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
23369 ; NoVLX-NEXT: kshiftlw $4, %k0, %k0
23370 ; NoVLX-NEXT: kmovw %k0, %eax
23371 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
23372 ; NoVLX-NEXT: vzeroupper
23374 %cmp = icmp ne <4 x i32> %a, zeroinitializer
23375 %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
23376 %cast = bitcast <8 x i1> %concat to i8