1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
5 define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6 ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
7 ; VLX: # %bb.0: # %entry
8 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
9 ; VLX-NEXT: kmovd %k0, %eax
12 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
13 ; NoVLX: # %bb.0: # %entry
14 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
15 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
16 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
17 ; NoVLX-NEXT: kmovw %k0, %eax
18 ; NoVLX-NEXT: vzeroupper
21 %0 = bitcast <2 x i64> %__a to <16 x i8>
22 %1 = bitcast <2 x i64> %__b to <16 x i8>
23 %2 = icmp eq <16 x i8> %0, %1
24 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
25 %4 = bitcast <32 x i1> %3 to i32
29 define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
30 ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
31 ; VLX: # %bb.0: # %entry
32 ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
33 ; VLX-NEXT: kmovd %k0, %eax
36 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
37 ; NoVLX: # %bb.0: # %entry
38 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
39 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
40 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
41 ; NoVLX-NEXT: kmovw %k0, %eax
42 ; NoVLX-NEXT: vzeroupper
45 %0 = bitcast <2 x i64> %__a to <16 x i8>
46 %load = load <2 x i64>, <2 x i64>* %__b
47 %1 = bitcast <2 x i64> %load to <16 x i8>
48 %2 = icmp eq <16 x i8> %0, %1
49 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
50 %4 = bitcast <32 x i1> %3 to i32
54 define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
55 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
56 ; VLX: # %bb.0: # %entry
57 ; VLX-NEXT: kmovd %edi, %k1
58 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
59 ; VLX-NEXT: kmovd %k0, %eax
62 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
63 ; NoVLX: # %bb.0: # %entry
64 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
65 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
66 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
67 ; NoVLX-NEXT: kmovw %k0, %eax
68 ; NoVLX-NEXT: andl %edi, %eax
69 ; NoVLX-NEXT: vzeroupper
72 %0 = bitcast <2 x i64> %__a to <16 x i8>
73 %1 = bitcast <2 x i64> %__b to <16 x i8>
74 %2 = icmp eq <16 x i8> %0, %1
75 %3 = bitcast i16 %__u to <16 x i1>
76 %4 = and <16 x i1> %2, %3
77 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
78 %6 = bitcast <32 x i1> %5 to i32
82 define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
83 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
84 ; VLX: # %bb.0: # %entry
85 ; VLX-NEXT: kmovd %edi, %k1
86 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
87 ; VLX-NEXT: kmovd %k0, %eax
90 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
91 ; NoVLX: # %bb.0: # %entry
92 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
93 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
94 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
95 ; NoVLX-NEXT: kmovw %k0, %eax
96 ; NoVLX-NEXT: andl %edi, %eax
97 ; NoVLX-NEXT: vzeroupper
100 %0 = bitcast <2 x i64> %__a to <16 x i8>
101 %load = load <2 x i64>, <2 x i64>* %__b
102 %1 = bitcast <2 x i64> %load to <16 x i8>
103 %2 = icmp eq <16 x i8> %0, %1
104 %3 = bitcast i16 %__u to <16 x i1>
105 %4 = and <16 x i1> %2, %3
106 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
107 %6 = bitcast <32 x i1> %5 to i32
112 define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
113 ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
114 ; VLX: # %bb.0: # %entry
115 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
116 ; VLX-NEXT: kmovq %k0, %rax
119 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
120 ; NoVLX: # %bb.0: # %entry
121 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
122 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
123 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
124 ; NoVLX-NEXT: kmovw %k0, %eax
125 ; NoVLX-NEXT: vzeroupper
128 %0 = bitcast <2 x i64> %__a to <16 x i8>
129 %1 = bitcast <2 x i64> %__b to <16 x i8>
130 %2 = icmp eq <16 x i8> %0, %1
131 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
132 %4 = bitcast <64 x i1> %3 to i64
136 define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
137 ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
138 ; VLX: # %bb.0: # %entry
139 ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
140 ; VLX-NEXT: kmovq %k0, %rax
143 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
144 ; NoVLX: # %bb.0: # %entry
145 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
146 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
147 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
148 ; NoVLX-NEXT: kmovw %k0, %eax
149 ; NoVLX-NEXT: vzeroupper
152 %0 = bitcast <2 x i64> %__a to <16 x i8>
153 %load = load <2 x i64>, <2 x i64>* %__b
154 %1 = bitcast <2 x i64> %load to <16 x i8>
155 %2 = icmp eq <16 x i8> %0, %1
156 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
157 %4 = bitcast <64 x i1> %3 to i64
161 define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
162 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
163 ; VLX: # %bb.0: # %entry
164 ; VLX-NEXT: kmovd %edi, %k1
165 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
166 ; VLX-NEXT: kmovq %k0, %rax
169 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
170 ; NoVLX: # %bb.0: # %entry
171 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
172 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
173 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
174 ; NoVLX-NEXT: kmovw %k0, %eax
175 ; NoVLX-NEXT: andl %edi, %eax
176 ; NoVLX-NEXT: vzeroupper
179 %0 = bitcast <2 x i64> %__a to <16 x i8>
180 %1 = bitcast <2 x i64> %__b to <16 x i8>
181 %2 = icmp eq <16 x i8> %0, %1
182 %3 = bitcast i16 %__u to <16 x i1>
183 %4 = and <16 x i1> %2, %3
184 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
185 %6 = bitcast <64 x i1> %5 to i64
189 define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
190 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
191 ; VLX: # %bb.0: # %entry
192 ; VLX-NEXT: kmovd %edi, %k1
193 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
194 ; VLX-NEXT: kmovq %k0, %rax
197 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
198 ; NoVLX: # %bb.0: # %entry
199 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
200 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
201 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
202 ; NoVLX-NEXT: kmovw %k0, %eax
203 ; NoVLX-NEXT: andl %edi, %eax
204 ; NoVLX-NEXT: vzeroupper
207 %0 = bitcast <2 x i64> %__a to <16 x i8>
208 %load = load <2 x i64>, <2 x i64>* %__b
209 %1 = bitcast <2 x i64> %load to <16 x i8>
210 %2 = icmp eq <16 x i8> %0, %1
211 %3 = bitcast i16 %__u to <16 x i1>
212 %4 = and <16 x i1> %2, %3
213 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
214 %6 = bitcast <64 x i1> %5 to i64
219 define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
220 ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
221 ; VLX: # %bb.0: # %entry
222 ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
223 ; VLX-NEXT: kmovq %k0, %rax
224 ; VLX-NEXT: vzeroupper
227 ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
228 ; NoVLX: # %bb.0: # %entry
229 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
230 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
231 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
232 ; NoVLX-NEXT: kmovw %k0, %ecx
233 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
234 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
235 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
236 ; NoVLX-NEXT: kmovw %k0, %eax
237 ; NoVLX-NEXT: shll $16, %eax
238 ; NoVLX-NEXT: orl %ecx, %eax
239 ; NoVLX-NEXT: vzeroupper
242 %0 = bitcast <4 x i64> %__a to <32 x i8>
243 %1 = bitcast <4 x i64> %__b to <32 x i8>
244 %2 = icmp eq <32 x i8> %0, %1
245 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
246 %4 = bitcast <64 x i1> %3 to i64
250 define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
251 ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
252 ; VLX: # %bb.0: # %entry
253 ; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
254 ; VLX-NEXT: kmovq %k0, %rax
255 ; VLX-NEXT: vzeroupper
258 ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
259 ; NoVLX: # %bb.0: # %entry
260 ; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0
261 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
262 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
263 ; NoVLX-NEXT: kmovw %k0, %ecx
264 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
265 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
266 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
267 ; NoVLX-NEXT: kmovw %k0, %eax
268 ; NoVLX-NEXT: shll $16, %eax
269 ; NoVLX-NEXT: orl %ecx, %eax
270 ; NoVLX-NEXT: vzeroupper
273 %0 = bitcast <4 x i64> %__a to <32 x i8>
274 %load = load <4 x i64>, <4 x i64>* %__b
275 %1 = bitcast <4 x i64> %load to <32 x i8>
276 %2 = icmp eq <32 x i8> %0, %1
277 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
278 %4 = bitcast <64 x i1> %3 to i64
282 define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
283 ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
284 ; VLX: # %bb.0: # %entry
285 ; VLX-NEXT: kmovd %edi, %k1
286 ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
287 ; VLX-NEXT: kmovq %k0, %rax
288 ; VLX-NEXT: vzeroupper
291 ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
292 ; NoVLX: # %bb.0: # %entry
293 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
294 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
295 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
296 ; NoVLX-NEXT: kmovw %k0, %eax
297 ; NoVLX-NEXT: andl %edi, %eax
298 ; NoVLX-NEXT: shrl $16, %edi
299 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
300 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
301 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
302 ; NoVLX-NEXT: kmovw %k0, %ecx
303 ; NoVLX-NEXT: andl %edi, %ecx
304 ; NoVLX-NEXT: shll $16, %ecx
305 ; NoVLX-NEXT: movzwl %ax, %eax
306 ; NoVLX-NEXT: orl %ecx, %eax
307 ; NoVLX-NEXT: vzeroupper
310 %0 = bitcast <4 x i64> %__a to <32 x i8>
311 %1 = bitcast <4 x i64> %__b to <32 x i8>
312 %2 = icmp eq <32 x i8> %0, %1
313 %3 = bitcast i32 %__u to <32 x i1>
314 %4 = and <32 x i1> %2, %3
315 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
316 %6 = bitcast <64 x i1> %5 to i64
320 define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
321 ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
322 ; VLX: # %bb.0: # %entry
323 ; VLX-NEXT: kmovd %edi, %k1
324 ; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
325 ; VLX-NEXT: kmovq %k0, %rax
326 ; VLX-NEXT: vzeroupper
329 ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
330 ; NoVLX: # %bb.0: # %entry
331 ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
332 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
333 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
334 ; NoVLX-NEXT: kmovw %k0, %eax
335 ; NoVLX-NEXT: andl %edi, %eax
336 ; NoVLX-NEXT: shrl $16, %edi
337 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
338 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
339 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
340 ; NoVLX-NEXT: kmovw %k0, %ecx
341 ; NoVLX-NEXT: andl %edi, %ecx
342 ; NoVLX-NEXT: shll $16, %ecx
343 ; NoVLX-NEXT: movzwl %ax, %eax
344 ; NoVLX-NEXT: orl %ecx, %eax
345 ; NoVLX-NEXT: vzeroupper
348 %0 = bitcast <4 x i64> %__a to <32 x i8>
349 %load = load <4 x i64>, <4 x i64>* %__b
350 %1 = bitcast <4 x i64> %load to <32 x i8>
351 %2 = icmp eq <32 x i8> %0, %1
352 %3 = bitcast i32 %__u to <32 x i1>
353 %4 = and <32 x i1> %2, %3
354 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
355 %6 = bitcast <64 x i1> %5 to i64
360 define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
361 ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
362 ; VLX: # %bb.0: # %entry
363 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
364 ; VLX-NEXT: kmovd %k0, %eax
365 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
368 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
369 ; NoVLX: # %bb.0: # %entry
370 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
371 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
372 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
373 ; NoVLX-NEXT: kmovw %k0, %eax
374 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
375 ; NoVLX-NEXT: vzeroupper
378 %0 = bitcast <2 x i64> %__a to <8 x i16>
379 %1 = bitcast <2 x i64> %__b to <8 x i16>
380 %2 = icmp eq <8 x i16> %0, %1
381 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
382 %4 = bitcast <16 x i1> %3 to i16
386 define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
387 ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
388 ; VLX: # %bb.0: # %entry
389 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
390 ; VLX-NEXT: kmovd %k0, %eax
391 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
394 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
395 ; NoVLX: # %bb.0: # %entry
396 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
397 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
398 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
399 ; NoVLX-NEXT: kmovw %k0, %eax
400 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
401 ; NoVLX-NEXT: vzeroupper
404 %0 = bitcast <2 x i64> %__a to <8 x i16>
405 %load = load <2 x i64>, <2 x i64>* %__b
406 %1 = bitcast <2 x i64> %load to <8 x i16>
407 %2 = icmp eq <8 x i16> %0, %1
408 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
409 %4 = bitcast <16 x i1> %3 to i16
413 define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
414 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
415 ; VLX: # %bb.0: # %entry
416 ; VLX-NEXT: kmovd %edi, %k1
417 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
418 ; VLX-NEXT: kmovd %k0, %eax
419 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
422 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
423 ; NoVLX: # %bb.0: # %entry
424 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
425 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
426 ; NoVLX-NEXT: kmovw %edi, %k1
427 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
428 ; NoVLX-NEXT: kmovw %k0, %eax
429 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
430 ; NoVLX-NEXT: vzeroupper
433 %0 = bitcast <2 x i64> %__a to <8 x i16>
434 %1 = bitcast <2 x i64> %__b to <8 x i16>
435 %2 = icmp eq <8 x i16> %0, %1
436 %3 = bitcast i8 %__u to <8 x i1>
437 %4 = and <8 x i1> %2, %3
438 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
439 %6 = bitcast <16 x i1> %5 to i16
443 define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
444 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
445 ; VLX: # %bb.0: # %entry
446 ; VLX-NEXT: kmovd %edi, %k1
447 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
448 ; VLX-NEXT: kmovd %k0, %eax
449 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
452 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
453 ; NoVLX: # %bb.0: # %entry
454 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
455 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
456 ; NoVLX-NEXT: kmovw %edi, %k1
457 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
458 ; NoVLX-NEXT: kmovw %k0, %eax
459 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
460 ; NoVLX-NEXT: vzeroupper
463 %0 = bitcast <2 x i64> %__a to <8 x i16>
464 %load = load <2 x i64>, <2 x i64>* %__b
465 %1 = bitcast <2 x i64> %load to <8 x i16>
466 %2 = icmp eq <8 x i16> %0, %1
467 %3 = bitcast i8 %__u to <8 x i1>
468 %4 = and <8 x i1> %2, %3
469 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
470 %6 = bitcast <16 x i1> %5 to i16
475 define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
476 ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
477 ; VLX: # %bb.0: # %entry
478 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
479 ; VLX-NEXT: kmovd %k0, %eax
482 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
483 ; NoVLX: # %bb.0: # %entry
484 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
485 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
486 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
487 ; NoVLX-NEXT: kmovw %k0, %eax
488 ; NoVLX-NEXT: vzeroupper
491 %0 = bitcast <2 x i64> %__a to <8 x i16>
492 %1 = bitcast <2 x i64> %__b to <8 x i16>
493 %2 = icmp eq <8 x i16> %0, %1
494 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
495 %4 = bitcast <32 x i1> %3 to i32
499 define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
500 ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
501 ; VLX: # %bb.0: # %entry
502 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
503 ; VLX-NEXT: kmovd %k0, %eax
506 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
507 ; NoVLX: # %bb.0: # %entry
508 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
509 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
510 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
511 ; NoVLX-NEXT: kmovw %k0, %eax
512 ; NoVLX-NEXT: vzeroupper
515 %0 = bitcast <2 x i64> %__a to <8 x i16>
516 %load = load <2 x i64>, <2 x i64>* %__b
517 %1 = bitcast <2 x i64> %load to <8 x i16>
518 %2 = icmp eq <8 x i16> %0, %1
519 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
520 %4 = bitcast <32 x i1> %3 to i32
524 define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
525 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
526 ; VLX: # %bb.0: # %entry
527 ; VLX-NEXT: kmovd %edi, %k1
528 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
529 ; VLX-NEXT: kmovd %k0, %eax
532 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
533 ; NoVLX: # %bb.0: # %entry
534 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
535 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
536 ; NoVLX-NEXT: kmovw %edi, %k1
537 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
538 ; NoVLX-NEXT: kmovw %k0, %eax
539 ; NoVLX-NEXT: vzeroupper
542 %0 = bitcast <2 x i64> %__a to <8 x i16>
543 %1 = bitcast <2 x i64> %__b to <8 x i16>
544 %2 = icmp eq <8 x i16> %0, %1
545 %3 = bitcast i8 %__u to <8 x i1>
546 %4 = and <8 x i1> %2, %3
547 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
548 %6 = bitcast <32 x i1> %5 to i32
552 define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
553 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
554 ; VLX: # %bb.0: # %entry
555 ; VLX-NEXT: kmovd %edi, %k1
556 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
557 ; VLX-NEXT: kmovd %k0, %eax
560 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
561 ; NoVLX: # %bb.0: # %entry
562 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
563 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
564 ; NoVLX-NEXT: kmovw %edi, %k1
565 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
566 ; NoVLX-NEXT: kmovw %k0, %eax
567 ; NoVLX-NEXT: vzeroupper
570 %0 = bitcast <2 x i64> %__a to <8 x i16>
571 %load = load <2 x i64>, <2 x i64>* %__b
572 %1 = bitcast <2 x i64> %load to <8 x i16>
573 %2 = icmp eq <8 x i16> %0, %1
574 %3 = bitcast i8 %__u to <8 x i1>
575 %4 = and <8 x i1> %2, %3
576 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
577 %6 = bitcast <32 x i1> %5 to i32
582 define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
583 ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
584 ; VLX: # %bb.0: # %entry
585 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
586 ; VLX-NEXT: kmovq %k0, %rax
589 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
590 ; NoVLX: # %bb.0: # %entry
591 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
592 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
593 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
594 ; NoVLX-NEXT: kmovw %k0, %eax
595 ; NoVLX-NEXT: vzeroupper
598 %0 = bitcast <2 x i64> %__a to <8 x i16>
599 %1 = bitcast <2 x i64> %__b to <8 x i16>
600 %2 = icmp eq <8 x i16> %0, %1
601 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
602 %4 = bitcast <64 x i1> %3 to i64
606 define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
607 ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
608 ; VLX: # %bb.0: # %entry
609 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
610 ; VLX-NEXT: kmovq %k0, %rax
613 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
614 ; NoVLX: # %bb.0: # %entry
615 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
616 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
617 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
618 ; NoVLX-NEXT: kmovw %k0, %eax
619 ; NoVLX-NEXT: vzeroupper
622 %0 = bitcast <2 x i64> %__a to <8 x i16>
623 %load = load <2 x i64>, <2 x i64>* %__b
624 %1 = bitcast <2 x i64> %load to <8 x i16>
625 %2 = icmp eq <8 x i16> %0, %1
626 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
627 %4 = bitcast <64 x i1> %3 to i64
631 define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
632 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
633 ; VLX: # %bb.0: # %entry
634 ; VLX-NEXT: kmovd %edi, %k1
635 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
636 ; VLX-NEXT: kmovq %k0, %rax
639 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
640 ; NoVLX: # %bb.0: # %entry
641 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
642 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
643 ; NoVLX-NEXT: kmovw %edi, %k1
644 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
645 ; NoVLX-NEXT: kmovw %k0, %eax
646 ; NoVLX-NEXT: vzeroupper
649 %0 = bitcast <2 x i64> %__a to <8 x i16>
650 %1 = bitcast <2 x i64> %__b to <8 x i16>
651 %2 = icmp eq <8 x i16> %0, %1
652 %3 = bitcast i8 %__u to <8 x i1>
653 %4 = and <8 x i1> %2, %3
654 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
655 %6 = bitcast <64 x i1> %5 to i64
659 define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
660 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
661 ; VLX: # %bb.0: # %entry
662 ; VLX-NEXT: kmovd %edi, %k1
663 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
664 ; VLX-NEXT: kmovq %k0, %rax
667 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
668 ; NoVLX: # %bb.0: # %entry
669 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
670 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
671 ; NoVLX-NEXT: kmovw %edi, %k1
672 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
673 ; NoVLX-NEXT: kmovw %k0, %eax
674 ; NoVLX-NEXT: vzeroupper
677 %0 = bitcast <2 x i64> %__a to <8 x i16>
678 %load = load <2 x i64>, <2 x i64>* %__b
679 %1 = bitcast <2 x i64> %load to <8 x i16>
680 %2 = icmp eq <8 x i16> %0, %1
681 %3 = bitcast i8 %__u to <8 x i1>
682 %4 = and <8 x i1> %2, %3
683 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
684 %6 = bitcast <64 x i1> %5 to i64
689 define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
690 ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
691 ; VLX: # %bb.0: # %entry
692 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
693 ; VLX-NEXT: kmovd %k0, %eax
694 ; VLX-NEXT: vzeroupper
697 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
698 ; NoVLX: # %bb.0: # %entry
699 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
700 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
701 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
702 ; NoVLX-NEXT: kmovw %k0, %eax
703 ; NoVLX-NEXT: vzeroupper
706 %0 = bitcast <4 x i64> %__a to <16 x i16>
707 %1 = bitcast <4 x i64> %__b to <16 x i16>
708 %2 = icmp eq <16 x i16> %0, %1
709 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
710 %4 = bitcast <32 x i1> %3 to i32
714 define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
715 ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
716 ; VLX: # %bb.0: # %entry
717 ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
718 ; VLX-NEXT: kmovd %k0, %eax
719 ; VLX-NEXT: vzeroupper
722 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
723 ; NoVLX: # %bb.0: # %entry
724 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
725 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
726 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
727 ; NoVLX-NEXT: kmovw %k0, %eax
728 ; NoVLX-NEXT: vzeroupper
731 %0 = bitcast <4 x i64> %__a to <16 x i16>
732 %load = load <4 x i64>, <4 x i64>* %__b
733 %1 = bitcast <4 x i64> %load to <16 x i16>
734 %2 = icmp eq <16 x i16> %0, %1
735 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
736 %4 = bitcast <32 x i1> %3 to i32
740 define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
741 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
742 ; VLX: # %bb.0: # %entry
743 ; VLX-NEXT: kmovd %edi, %k1
744 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
745 ; VLX-NEXT: kmovd %k0, %eax
746 ; VLX-NEXT: vzeroupper
749 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
750 ; NoVLX: # %bb.0: # %entry
751 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
752 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
753 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
754 ; NoVLX-NEXT: kmovw %k0, %eax
755 ; NoVLX-NEXT: andl %edi, %eax
756 ; NoVLX-NEXT: vzeroupper
759 %0 = bitcast <4 x i64> %__a to <16 x i16>
760 %1 = bitcast <4 x i64> %__b to <16 x i16>
761 %2 = icmp eq <16 x i16> %0, %1
762 %3 = bitcast i16 %__u to <16 x i1>
763 %4 = and <16 x i1> %2, %3
764 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
765 %6 = bitcast <32 x i1> %5 to i32
769 define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
770 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
771 ; VLX: # %bb.0: # %entry
772 ; VLX-NEXT: kmovd %edi, %k1
773 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
774 ; VLX-NEXT: kmovd %k0, %eax
775 ; VLX-NEXT: vzeroupper
778 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
779 ; NoVLX: # %bb.0: # %entry
780 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
781 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
782 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
783 ; NoVLX-NEXT: kmovw %k0, %eax
784 ; NoVLX-NEXT: andl %edi, %eax
785 ; NoVLX-NEXT: vzeroupper
788 %0 = bitcast <4 x i64> %__a to <16 x i16>
789 %load = load <4 x i64>, <4 x i64>* %__b
790 %1 = bitcast <4 x i64> %load to <16 x i16>
791 %2 = icmp eq <16 x i16> %0, %1
792 %3 = bitcast i16 %__u to <16 x i1>
793 %4 = and <16 x i1> %2, %3
794 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
795 %6 = bitcast <32 x i1> %5 to i32
800 define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
801 ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
802 ; VLX: # %bb.0: # %entry
803 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
804 ; VLX-NEXT: kmovq %k0, %rax
805 ; VLX-NEXT: vzeroupper
808 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
809 ; NoVLX: # %bb.0: # %entry
810 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
811 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
812 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
813 ; NoVLX-NEXT: kmovw %k0, %eax
814 ; NoVLX-NEXT: vzeroupper
817 %0 = bitcast <4 x i64> %__a to <16 x i16>
818 %1 = bitcast <4 x i64> %__b to <16 x i16>
819 %2 = icmp eq <16 x i16> %0, %1
820 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
821 %4 = bitcast <64 x i1> %3 to i64
825 define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
826 ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
827 ; VLX: # %bb.0: # %entry
828 ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
829 ; VLX-NEXT: kmovq %k0, %rax
830 ; VLX-NEXT: vzeroupper
833 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
834 ; NoVLX: # %bb.0: # %entry
835 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
836 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
837 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
838 ; NoVLX-NEXT: kmovw %k0, %eax
839 ; NoVLX-NEXT: vzeroupper
842 %0 = bitcast <4 x i64> %__a to <16 x i16>
843 %load = load <4 x i64>, <4 x i64>* %__b
844 %1 = bitcast <4 x i64> %load to <16 x i16>
845 %2 = icmp eq <16 x i16> %0, %1
846 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
847 %4 = bitcast <64 x i1> %3 to i64
851 define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
852 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
853 ; VLX: # %bb.0: # %entry
854 ; VLX-NEXT: kmovd %edi, %k1
855 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
856 ; VLX-NEXT: kmovq %k0, %rax
857 ; VLX-NEXT: vzeroupper
860 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
861 ; NoVLX: # %bb.0: # %entry
862 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
863 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
864 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
865 ; NoVLX-NEXT: kmovw %k0, %eax
866 ; NoVLX-NEXT: andl %edi, %eax
867 ; NoVLX-NEXT: vzeroupper
870 %0 = bitcast <4 x i64> %__a to <16 x i16>
871 %1 = bitcast <4 x i64> %__b to <16 x i16>
872 %2 = icmp eq <16 x i16> %0, %1
873 %3 = bitcast i16 %__u to <16 x i1>
874 %4 = and <16 x i1> %2, %3
875 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
876 %6 = bitcast <64 x i1> %5 to i64
880 define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
881 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
882 ; VLX: # %bb.0: # %entry
883 ; VLX-NEXT: kmovd %edi, %k1
884 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
885 ; VLX-NEXT: kmovq %k0, %rax
886 ; VLX-NEXT: vzeroupper
889 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
890 ; NoVLX: # %bb.0: # %entry
891 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
892 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
893 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
894 ; NoVLX-NEXT: kmovw %k0, %eax
895 ; NoVLX-NEXT: andl %edi, %eax
896 ; NoVLX-NEXT: vzeroupper
899 %0 = bitcast <4 x i64> %__a to <16 x i16>
900 %load = load <4 x i64>, <4 x i64>* %__b
901 %1 = bitcast <4 x i64> %load to <16 x i16>
902 %2 = icmp eq <16 x i16> %0, %1
903 %3 = bitcast i16 %__u to <16 x i1>
904 %4 = and <16 x i1> %2, %3
905 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
906 %6 = bitcast <64 x i1> %5 to i64
911 define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
912 ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
913 ; VLX: # %bb.0: # %entry
914 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
915 ; VLX-NEXT: kmovq %k0, %rax
916 ; VLX-NEXT: vzeroupper
919 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
920 ; NoVLX: # %bb.0: # %entry
921 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
922 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
923 ; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
924 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
925 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
926 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
927 ; NoVLX-NEXT: kmovw %k0, %ecx
928 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
929 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
930 ; NoVLX-NEXT: kmovw %k0, %eax
931 ; NoVLX-NEXT: shll $16, %eax
932 ; NoVLX-NEXT: orl %ecx, %eax
933 ; NoVLX-NEXT: vzeroupper
936 %0 = bitcast <8 x i64> %__a to <32 x i16>
937 %1 = bitcast <8 x i64> %__b to <32 x i16>
938 %2 = icmp eq <32 x i16> %0, %1
939 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
940 %4 = bitcast <64 x i1> %3 to i64
944 define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
945 ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
946 ; VLX: # %bb.0: # %entry
947 ; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
948 ; VLX-NEXT: kmovq %k0, %rax
949 ; VLX-NEXT: vzeroupper
952 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
953 ; NoVLX: # %bb.0: # %entry
954 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
955 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
956 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
957 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
958 ; NoVLX-NEXT: kmovw %k0, %ecx
959 ; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm0
960 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
961 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
962 ; NoVLX-NEXT: kmovw %k0, %eax
963 ; NoVLX-NEXT: shll $16, %eax
964 ; NoVLX-NEXT: orl %ecx, %eax
965 ; NoVLX-NEXT: vzeroupper
968 %0 = bitcast <8 x i64> %__a to <32 x i16>
969 %load = load <8 x i64>, <8 x i64>* %__b
970 %1 = bitcast <8 x i64> %load to <32 x i16>
971 %2 = icmp eq <32 x i16> %0, %1
972 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
973 %4 = bitcast <64 x i1> %3 to i64
977 define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
978 ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
979 ; VLX: # %bb.0: # %entry
980 ; VLX-NEXT: kmovd %edi, %k1
981 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
982 ; VLX-NEXT: kmovq %k0, %rax
983 ; VLX-NEXT: vzeroupper
986 ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
987 ; NoVLX: # %bb.0: # %entry
988 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2
989 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
990 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
991 ; NoVLX-NEXT: kmovw %k0, %eax
992 ; NoVLX-NEXT: andl %edi, %eax
993 ; NoVLX-NEXT: shrl $16, %edi
994 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
995 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
996 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
997 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
998 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
999 ; NoVLX-NEXT: kmovw %k0, %ecx
1000 ; NoVLX-NEXT: andl %edi, %ecx
1001 ; NoVLX-NEXT: shll $16, %ecx
1002 ; NoVLX-NEXT: movzwl %ax, %eax
1003 ; NoVLX-NEXT: orl %ecx, %eax
1004 ; NoVLX-NEXT: vzeroupper
1007 %0 = bitcast <8 x i64> %__a to <32 x i16>
1008 %1 = bitcast <8 x i64> %__b to <32 x i16>
1009 %2 = icmp eq <32 x i16> %0, %1
1010 %3 = bitcast i32 %__u to <32 x i1>
1011 %4 = and <32 x i1> %2, %3
1012 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1013 %6 = bitcast <64 x i1> %5 to i64
1017 define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
1018 ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1019 ; VLX: # %bb.0: # %entry
1020 ; VLX-NEXT: kmovd %edi, %k1
1021 ; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
1022 ; VLX-NEXT: kmovq %k0, %rax
1023 ; VLX-NEXT: vzeroupper
1026 ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1027 ; NoVLX: # %bb.0: # %entry
1028 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm1
1029 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
1030 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1031 ; NoVLX-NEXT: kmovw %k0, %eax
1032 ; NoVLX-NEXT: andl %edi, %eax
1033 ; NoVLX-NEXT: shrl $16, %edi
1034 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1035 ; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0
1036 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1037 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1038 ; NoVLX-NEXT: kmovw %k0, %ecx
1039 ; NoVLX-NEXT: andl %edi, %ecx
1040 ; NoVLX-NEXT: shll $16, %ecx
1041 ; NoVLX-NEXT: movzwl %ax, %eax
1042 ; NoVLX-NEXT: orl %ecx, %eax
1043 ; NoVLX-NEXT: vzeroupper
1046 %0 = bitcast <8 x i64> %__a to <32 x i16>
1047 %load = load <8 x i64>, <8 x i64>* %__b
1048 %1 = bitcast <8 x i64> %load to <32 x i16>
1049 %2 = icmp eq <32 x i16> %0, %1
1050 %3 = bitcast i32 %__u to <32 x i1>
1051 %4 = and <32 x i1> %2, %3
1052 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1053 %6 = bitcast <64 x i1> %5 to i64
1058 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1059 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1060 ; VLX: # %bb.0: # %entry
1061 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1062 ; VLX-NEXT: kmovd %k0, %eax
1063 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1066 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1067 ; NoVLX: # %bb.0: # %entry
1068 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1069 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1070 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1071 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1072 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1073 ; NoVLX-NEXT: kmovw %k0, %eax
1074 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1075 ; NoVLX-NEXT: vzeroupper
1078 %0 = bitcast <2 x i64> %__a to <4 x i32>
1079 %1 = bitcast <2 x i64> %__b to <4 x i32>
1080 %2 = icmp eq <4 x i32> %0, %1
1081 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1082 %4 = bitcast <8 x i1> %3 to i8
1086 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1087 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1088 ; VLX: # %bb.0: # %entry
1089 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1090 ; VLX-NEXT: kmovd %k0, %eax
1091 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1094 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1095 ; NoVLX: # %bb.0: # %entry
1096 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1097 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1098 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1099 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1100 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1101 ; NoVLX-NEXT: kmovw %k0, %eax
1102 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1103 ; NoVLX-NEXT: vzeroupper
1106 %0 = bitcast <2 x i64> %__a to <4 x i32>
1107 %load = load <2 x i64>, <2 x i64>* %__b
1108 %1 = bitcast <2 x i64> %load to <4 x i32>
1109 %2 = icmp eq <4 x i32> %0, %1
1110 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1111 %4 = bitcast <8 x i1> %3 to i8
1115 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1116 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1117 ; VLX: # %bb.0: # %entry
1118 ; VLX-NEXT: kmovd %edi, %k1
1119 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1120 ; VLX-NEXT: kmovd %k0, %eax
1121 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1124 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1125 ; NoVLX: # %bb.0: # %entry
1126 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1127 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1128 ; NoVLX-NEXT: kmovw %edi, %k1
1129 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1130 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1131 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1132 ; NoVLX-NEXT: kmovw %k0, %eax
1133 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1134 ; NoVLX-NEXT: vzeroupper
1137 %0 = bitcast <2 x i64> %__a to <4 x i32>
1138 %1 = bitcast <2 x i64> %__b to <4 x i32>
1139 %2 = icmp eq <4 x i32> %0, %1
1140 %3 = bitcast i8 %__u to <8 x i1>
1141 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1142 %4 = and <4 x i1> %2, %extract.i
1143 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1144 %6 = bitcast <8 x i1> %5 to i8
1148 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1149 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1150 ; VLX: # %bb.0: # %entry
1151 ; VLX-NEXT: kmovd %edi, %k1
1152 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1153 ; VLX-NEXT: kmovd %k0, %eax
1154 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1157 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1158 ; NoVLX: # %bb.0: # %entry
1159 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1160 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1161 ; NoVLX-NEXT: kmovw %edi, %k1
1162 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1163 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1164 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1165 ; NoVLX-NEXT: kmovw %k0, %eax
1166 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1167 ; NoVLX-NEXT: vzeroupper
1170 %0 = bitcast <2 x i64> %__a to <4 x i32>
1171 %load = load <2 x i64>, <2 x i64>* %__b
1172 %1 = bitcast <2 x i64> %load to <4 x i32>
1173 %2 = icmp eq <4 x i32> %0, %1
1174 %3 = bitcast i8 %__u to <8 x i1>
1175 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1176 %4 = and <4 x i1> %2, %extract.i
1177 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1178 %6 = bitcast <8 x i1> %5 to i8
1183 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1184 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1185 ; VLX: # %bb.0: # %entry
1186 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1187 ; VLX-NEXT: kmovd %k0, %eax
1188 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1191 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1192 ; NoVLX: # %bb.0: # %entry
1193 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1194 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
1195 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1196 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1197 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1198 ; NoVLX-NEXT: kmovw %k0, %eax
1199 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1200 ; NoVLX-NEXT: vzeroupper
1203 %0 = bitcast <2 x i64> %__a to <4 x i32>
1204 %load = load i32, i32* %__b
1205 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1206 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1207 %2 = icmp eq <4 x i32> %0, %1
1208 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1209 %4 = bitcast <8 x i1> %3 to i8
1213 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1214 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1215 ; VLX: # %bb.0: # %entry
1216 ; VLX-NEXT: kmovd %edi, %k1
1217 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1218 ; VLX-NEXT: kmovd %k0, %eax
1219 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1222 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1223 ; NoVLX: # %bb.0: # %entry
1224 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1225 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
1226 ; NoVLX-NEXT: kmovw %edi, %k1
1227 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1228 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1229 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1230 ; NoVLX-NEXT: kmovw %k0, %eax
1231 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1232 ; NoVLX-NEXT: vzeroupper
1235 %0 = bitcast <2 x i64> %__a to <4 x i32>
1236 %load = load i32, i32* %__b
1237 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1238 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1239 %2 = icmp eq <4 x i32> %0, %1
1240 %3 = bitcast i8 %__u to <8 x i1>
1241 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1242 %4 = and <4 x i1> %extract.i, %2
1243 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1244 %6 = bitcast <8 x i1> %5 to i8
1249 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1250 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1251 ; VLX: # %bb.0: # %entry
1252 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1253 ; VLX-NEXT: kmovd %k0, %eax
1254 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1257 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1258 ; NoVLX: # %bb.0: # %entry
1259 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1260 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1261 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1262 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1263 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1264 ; NoVLX-NEXT: kmovw %k0, %eax
1265 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1266 ; NoVLX-NEXT: vzeroupper
1269 %0 = bitcast <2 x i64> %__a to <4 x i32>
1270 %1 = bitcast <2 x i64> %__b to <4 x i32>
1271 %2 = icmp eq <4 x i32> %0, %1
1272 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1273 %4 = bitcast <16 x i1> %3 to i16
1277 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1278 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1279 ; VLX: # %bb.0: # %entry
1280 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1281 ; VLX-NEXT: kmovd %k0, %eax
1282 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1285 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1286 ; NoVLX: # %bb.0: # %entry
1287 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1288 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1289 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1290 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1291 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1292 ; NoVLX-NEXT: kmovw %k0, %eax
1293 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1294 ; NoVLX-NEXT: vzeroupper
1297 %0 = bitcast <2 x i64> %__a to <4 x i32>
1298 %load = load <2 x i64>, <2 x i64>* %__b
1299 %1 = bitcast <2 x i64> %load to <4 x i32>
1300 %2 = icmp eq <4 x i32> %0, %1
1301 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1302 %4 = bitcast <16 x i1> %3 to i16
1306 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1307 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1308 ; VLX: # %bb.0: # %entry
1309 ; VLX-NEXT: kmovd %edi, %k1
1310 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1311 ; VLX-NEXT: kmovd %k0, %eax
1312 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1315 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1316 ; NoVLX: # %bb.0: # %entry
1317 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1318 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1319 ; NoVLX-NEXT: kmovw %edi, %k1
1320 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1321 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1322 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1323 ; NoVLX-NEXT: kmovw %k0, %eax
1324 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1325 ; NoVLX-NEXT: vzeroupper
1328 %0 = bitcast <2 x i64> %__a to <4 x i32>
1329 %1 = bitcast <2 x i64> %__b to <4 x i32>
1330 %2 = icmp eq <4 x i32> %0, %1
1331 %3 = bitcast i8 %__u to <8 x i1>
1332 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1333 %4 = and <4 x i1> %2, %extract.i
1334 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1335 %6 = bitcast <16 x i1> %5 to i16
1339 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1340 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1341 ; VLX: # %bb.0: # %entry
1342 ; VLX-NEXT: kmovd %edi, %k1
1343 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1344 ; VLX-NEXT: kmovd %k0, %eax
1345 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1348 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1349 ; NoVLX: # %bb.0: # %entry
1350 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1351 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1352 ; NoVLX-NEXT: kmovw %edi, %k1
1353 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1354 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1355 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1356 ; NoVLX-NEXT: kmovw %k0, %eax
1357 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1358 ; NoVLX-NEXT: vzeroupper
1361 %0 = bitcast <2 x i64> %__a to <4 x i32>
1362 %load = load <2 x i64>, <2 x i64>* %__b
1363 %1 = bitcast <2 x i64> %load to <4 x i32>
1364 %2 = icmp eq <4 x i32> %0, %1
1365 %3 = bitcast i8 %__u to <8 x i1>
1366 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1367 %4 = and <4 x i1> %2, %extract.i
1368 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1369 %6 = bitcast <16 x i1> %5 to i16
1374 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1375 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1376 ; VLX: # %bb.0: # %entry
1377 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1378 ; VLX-NEXT: kmovd %k0, %eax
1379 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1382 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1383 ; NoVLX: # %bb.0: # %entry
1384 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1385 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
1386 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1387 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1388 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1389 ; NoVLX-NEXT: kmovw %k0, %eax
1390 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1391 ; NoVLX-NEXT: vzeroupper
1394 %0 = bitcast <2 x i64> %__a to <4 x i32>
1395 %load = load i32, i32* %__b
1396 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1397 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1398 %2 = icmp eq <4 x i32> %0, %1
1399 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1400 %4 = bitcast <16 x i1> %3 to i16
1404 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1405 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1406 ; VLX: # %bb.0: # %entry
1407 ; VLX-NEXT: kmovd %edi, %k1
1408 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1409 ; VLX-NEXT: kmovd %k0, %eax
1410 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1413 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1414 ; NoVLX: # %bb.0: # %entry
1415 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1416 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
1417 ; NoVLX-NEXT: kmovw %edi, %k1
1418 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1419 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1420 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1421 ; NoVLX-NEXT: kmovw %k0, %eax
1422 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1423 ; NoVLX-NEXT: vzeroupper
1426 %0 = bitcast <2 x i64> %__a to <4 x i32>
1427 %load = load i32, i32* %__b
1428 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1429 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1430 %2 = icmp eq <4 x i32> %0, %1
1431 %3 = bitcast i8 %__u to <8 x i1>
1432 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1433 %4 = and <4 x i1> %extract.i, %2
1434 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1435 %6 = bitcast <16 x i1> %5 to i16
1440 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1441 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1442 ; VLX: # %bb.0: # %entry
1443 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1444 ; VLX-NEXT: kmovd %k0, %eax
1447 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1448 ; NoVLX: # %bb.0: # %entry
1449 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1450 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1451 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1452 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1453 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1454 ; NoVLX-NEXT: kmovw %k0, %eax
1455 ; NoVLX-NEXT: vzeroupper
1458 %0 = bitcast <2 x i64> %__a to <4 x i32>
1459 %1 = bitcast <2 x i64> %__b to <4 x i32>
1460 %2 = icmp eq <4 x i32> %0, %1
1461 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1462 %4 = bitcast <32 x i1> %3 to i32
1466 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1467 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1468 ; VLX: # %bb.0: # %entry
1469 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1470 ; VLX-NEXT: kmovd %k0, %eax
1473 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1474 ; NoVLX: # %bb.0: # %entry
1475 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1476 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1477 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1478 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1479 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1480 ; NoVLX-NEXT: kmovw %k0, %eax
1481 ; NoVLX-NEXT: vzeroupper
1484 %0 = bitcast <2 x i64> %__a to <4 x i32>
1485 %load = load <2 x i64>, <2 x i64>* %__b
1486 %1 = bitcast <2 x i64> %load to <4 x i32>
1487 %2 = icmp eq <4 x i32> %0, %1
1488 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1489 %4 = bitcast <32 x i1> %3 to i32
1493 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1494 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1495 ; VLX: # %bb.0: # %entry
1496 ; VLX-NEXT: kmovd %edi, %k1
1497 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1498 ; VLX-NEXT: kmovd %k0, %eax
1501 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1502 ; NoVLX: # %bb.0: # %entry
1503 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1504 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1505 ; NoVLX-NEXT: kmovw %edi, %k1
1506 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1507 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1508 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1509 ; NoVLX-NEXT: kmovw %k0, %eax
1510 ; NoVLX-NEXT: vzeroupper
1513 %0 = bitcast <2 x i64> %__a to <4 x i32>
1514 %1 = bitcast <2 x i64> %__b to <4 x i32>
1515 %2 = icmp eq <4 x i32> %0, %1
1516 %3 = bitcast i8 %__u to <8 x i1>
1517 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1518 %4 = and <4 x i1> %2, %extract.i
1519 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1520 %6 = bitcast <32 x i1> %5 to i32
1524 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1525 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1526 ; VLX: # %bb.0: # %entry
1527 ; VLX-NEXT: kmovd %edi, %k1
1528 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1529 ; VLX-NEXT: kmovd %k0, %eax
1532 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1533 ; NoVLX: # %bb.0: # %entry
1534 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1535 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1536 ; NoVLX-NEXT: kmovw %edi, %k1
1537 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1538 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1539 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1540 ; NoVLX-NEXT: kmovw %k0, %eax
1541 ; NoVLX-NEXT: vzeroupper
1544 %0 = bitcast <2 x i64> %__a to <4 x i32>
1545 %load = load <2 x i64>, <2 x i64>* %__b
1546 %1 = bitcast <2 x i64> %load to <4 x i32>
1547 %2 = icmp eq <4 x i32> %0, %1
1548 %3 = bitcast i8 %__u to <8 x i1>
1549 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1550 %4 = and <4 x i1> %2, %extract.i
1551 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1552 %6 = bitcast <32 x i1> %5 to i32
1557 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1558 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1559 ; VLX: # %bb.0: # %entry
1560 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1561 ; VLX-NEXT: kmovd %k0, %eax
1564 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1565 ; NoVLX: # %bb.0: # %entry
1566 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1567 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
1568 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1569 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1570 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1571 ; NoVLX-NEXT: kmovw %k0, %eax
1572 ; NoVLX-NEXT: vzeroupper
1575 %0 = bitcast <2 x i64> %__a to <4 x i32>
1576 %load = load i32, i32* %__b
1577 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1578 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1579 %2 = icmp eq <4 x i32> %0, %1
1580 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1581 %4 = bitcast <32 x i1> %3 to i32
1585 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1586 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1587 ; VLX: # %bb.0: # %entry
1588 ; VLX-NEXT: kmovd %edi, %k1
1589 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1590 ; VLX-NEXT: kmovd %k0, %eax
1593 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1594 ; NoVLX: # %bb.0: # %entry
1595 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1596 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
1597 ; NoVLX-NEXT: kmovw %edi, %k1
1598 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1599 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1600 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1601 ; NoVLX-NEXT: kmovw %k0, %eax
1602 ; NoVLX-NEXT: vzeroupper
1605 %0 = bitcast <2 x i64> %__a to <4 x i32>
1606 %load = load i32, i32* %__b
1607 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1608 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1609 %2 = icmp eq <4 x i32> %0, %1
1610 %3 = bitcast i8 %__u to <8 x i1>
1611 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1612 %4 = and <4 x i1> %extract.i, %2
1613 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1614 %6 = bitcast <32 x i1> %5 to i32
1619 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1620 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1621 ; VLX: # %bb.0: # %entry
1622 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1623 ; VLX-NEXT: kmovq %k0, %rax
1626 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1627 ; NoVLX: # %bb.0: # %entry
1628 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1629 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1630 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1631 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1632 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1633 ; NoVLX-NEXT: kmovw %k0, %eax
1634 ; NoVLX-NEXT: vzeroupper
1637 %0 = bitcast <2 x i64> %__a to <4 x i32>
1638 %1 = bitcast <2 x i64> %__b to <4 x i32>
1639 %2 = icmp eq <4 x i32> %0, %1
1640 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1641 %4 = bitcast <64 x i1> %3 to i64
1645 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1646 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1647 ; VLX: # %bb.0: # %entry
1648 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1649 ; VLX-NEXT: kmovq %k0, %rax
1652 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1653 ; NoVLX: # %bb.0: # %entry
1654 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1655 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1656 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1657 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1658 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1659 ; NoVLX-NEXT: kmovw %k0, %eax
1660 ; NoVLX-NEXT: vzeroupper
1663 %0 = bitcast <2 x i64> %__a to <4 x i32>
1664 %load = load <2 x i64>, <2 x i64>* %__b
1665 %1 = bitcast <2 x i64> %load to <4 x i32>
1666 %2 = icmp eq <4 x i32> %0, %1
1667 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1668 %4 = bitcast <64 x i1> %3 to i64
1672 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1673 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1674 ; VLX: # %bb.0: # %entry
1675 ; VLX-NEXT: kmovd %edi, %k1
1676 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1677 ; VLX-NEXT: kmovq %k0, %rax
1680 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1681 ; NoVLX: # %bb.0: # %entry
1682 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1683 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1684 ; NoVLX-NEXT: kmovw %edi, %k1
1685 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1686 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1687 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1688 ; NoVLX-NEXT: kmovw %k0, %eax
1689 ; NoVLX-NEXT: vzeroupper
1692 %0 = bitcast <2 x i64> %__a to <4 x i32>
1693 %1 = bitcast <2 x i64> %__b to <4 x i32>
1694 %2 = icmp eq <4 x i32> %0, %1
1695 %3 = bitcast i8 %__u to <8 x i1>
1696 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1697 %4 = and <4 x i1> %2, %extract.i
1698 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1699 %6 = bitcast <64 x i1> %5 to i64
1703 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1704 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1705 ; VLX: # %bb.0: # %entry
1706 ; VLX-NEXT: kmovd %edi, %k1
1707 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1708 ; VLX-NEXT: kmovq %k0, %rax
1711 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1712 ; NoVLX: # %bb.0: # %entry
1713 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1714 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1715 ; NoVLX-NEXT: kmovw %edi, %k1
1716 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1717 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1718 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1719 ; NoVLX-NEXT: kmovw %k0, %eax
1720 ; NoVLX-NEXT: vzeroupper
1723 %0 = bitcast <2 x i64> %__a to <4 x i32>
1724 %load = load <2 x i64>, <2 x i64>* %__b
1725 %1 = bitcast <2 x i64> %load to <4 x i32>
1726 %2 = icmp eq <4 x i32> %0, %1
1727 %3 = bitcast i8 %__u to <8 x i1>
1728 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1729 %4 = and <4 x i1> %2, %extract.i
1730 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1731 %6 = bitcast <64 x i1> %5 to i64
1736 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1737 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1738 ; VLX: # %bb.0: # %entry
1739 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1740 ; VLX-NEXT: kmovq %k0, %rax
1743 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1744 ; NoVLX: # %bb.0: # %entry
1745 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1746 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
1747 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1748 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1749 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1750 ; NoVLX-NEXT: kmovw %k0, %eax
1751 ; NoVLX-NEXT: vzeroupper
1754 %0 = bitcast <2 x i64> %__a to <4 x i32>
1755 %load = load i32, i32* %__b
1756 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1757 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1758 %2 = icmp eq <4 x i32> %0, %1
1759 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1760 %4 = bitcast <64 x i1> %3 to i64
1764 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1765 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1766 ; VLX: # %bb.0: # %entry
1767 ; VLX-NEXT: kmovd %edi, %k1
1768 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1769 ; VLX-NEXT: kmovq %k0, %rax
1772 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1773 ; NoVLX: # %bb.0: # %entry
1774 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1775 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
1776 ; NoVLX-NEXT: kmovw %edi, %k1
1777 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1778 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1779 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1780 ; NoVLX-NEXT: kmovw %k0, %eax
1781 ; NoVLX-NEXT: vzeroupper
1784 %0 = bitcast <2 x i64> %__a to <4 x i32>
1785 %load = load i32, i32* %__b
1786 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1787 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1788 %2 = icmp eq <4 x i32> %0, %1
1789 %3 = bitcast i8 %__u to <8 x i1>
1790 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1791 %4 = and <4 x i1> %extract.i, %2
1792 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1793 %6 = bitcast <64 x i1> %5 to i64
1798 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1799 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1800 ; VLX: # %bb.0: # %entry
1801 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
1802 ; VLX-NEXT: kmovd %k0, %eax
1803 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1804 ; VLX-NEXT: vzeroupper
1807 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1808 ; NoVLX: # %bb.0: # %entry
1809 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1810 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1811 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1812 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1813 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1814 ; NoVLX-NEXT: kmovw %k0, %eax
1815 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1816 ; NoVLX-NEXT: vzeroupper
1819 %0 = bitcast <4 x i64> %__a to <8 x i32>
1820 %1 = bitcast <4 x i64> %__b to <8 x i32>
1821 %2 = icmp eq <8 x i32> %0, %1
1822 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1823 %4 = bitcast <16 x i1> %3 to i16
1827 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
1828 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1829 ; VLX: # %bb.0: # %entry
1830 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
1831 ; VLX-NEXT: kmovd %k0, %eax
1832 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1833 ; VLX-NEXT: vzeroupper
1836 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1837 ; NoVLX: # %bb.0: # %entry
1838 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1839 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
1840 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1841 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1842 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1843 ; NoVLX-NEXT: kmovw %k0, %eax
1844 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1845 ; NoVLX-NEXT: vzeroupper
1848 %0 = bitcast <4 x i64> %__a to <8 x i32>
1849 %load = load <4 x i64>, <4 x i64>* %__b
1850 %1 = bitcast <4 x i64> %load to <8 x i32>
1851 %2 = icmp eq <8 x i32> %0, %1
1852 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1853 %4 = bitcast <16 x i1> %3 to i16
1857 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1858 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1859 ; VLX: # %bb.0: # %entry
1860 ; VLX-NEXT: kmovd %edi, %k1
1861 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
1862 ; VLX-NEXT: kmovd %k0, %eax
1863 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1864 ; VLX-NEXT: vzeroupper
1867 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1868 ; NoVLX: # %bb.0: # %entry
1869 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1870 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1871 ; NoVLX-NEXT: kmovw %edi, %k1
1872 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1873 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1874 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1875 ; NoVLX-NEXT: kmovw %k0, %eax
1876 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1877 ; NoVLX-NEXT: vzeroupper
1880 %0 = bitcast <4 x i64> %__a to <8 x i32>
1881 %1 = bitcast <4 x i64> %__b to <8 x i32>
1882 %2 = icmp eq <8 x i32> %0, %1
1883 %3 = bitcast i8 %__u to <8 x i1>
1884 %4 = and <8 x i1> %2, %3
1885 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1886 %6 = bitcast <16 x i1> %5 to i16
1890 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
1891 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1892 ; VLX: # %bb.0: # %entry
1893 ; VLX-NEXT: kmovd %edi, %k1
1894 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
1895 ; VLX-NEXT: kmovd %k0, %eax
1896 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1897 ; VLX-NEXT: vzeroupper
1900 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1901 ; NoVLX: # %bb.0: # %entry
1902 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1903 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
1904 ; NoVLX-NEXT: kmovw %edi, %k1
1905 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1906 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1907 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1908 ; NoVLX-NEXT: kmovw %k0, %eax
1909 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1910 ; NoVLX-NEXT: vzeroupper
1913 %0 = bitcast <4 x i64> %__a to <8 x i32>
1914 %load = load <4 x i64>, <4 x i64>* %__b
1915 %1 = bitcast <4 x i64> %load to <8 x i32>
1916 %2 = icmp eq <8 x i32> %0, %1
1917 %3 = bitcast i8 %__u to <8 x i1>
1918 %4 = and <8 x i1> %2, %3
1919 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1920 %6 = bitcast <16 x i1> %5 to i16
1925 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
1926 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1927 ; VLX: # %bb.0: # %entry
1928 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
1929 ; VLX-NEXT: kmovd %k0, %eax
1930 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1931 ; VLX-NEXT: vzeroupper
1934 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1935 ; NoVLX: # %bb.0: # %entry
1936 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1937 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
1938 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1939 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1940 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1941 ; NoVLX-NEXT: kmovw %k0, %eax
1942 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1943 ; NoVLX-NEXT: vzeroupper
1946 %0 = bitcast <4 x i64> %__a to <8 x i32>
1947 %load = load i32, i32* %__b
1948 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
1949 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1950 %2 = icmp eq <8 x i32> %0, %1
1951 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1952 %4 = bitcast <16 x i1> %3 to i16
1956 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
1957 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1958 ; VLX: # %bb.0: # %entry
1959 ; VLX-NEXT: kmovd %edi, %k1
1960 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
1961 ; VLX-NEXT: kmovd %k0, %eax
1962 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1963 ; VLX-NEXT: vzeroupper
1966 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1967 ; NoVLX: # %bb.0: # %entry
1968 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1969 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
1970 ; NoVLX-NEXT: kmovw %edi, %k1
1971 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1972 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1973 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1974 ; NoVLX-NEXT: kmovw %k0, %eax
1975 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1976 ; NoVLX-NEXT: vzeroupper
1979 %0 = bitcast <4 x i64> %__a to <8 x i32>
1980 %load = load i32, i32* %__b
1981 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
1982 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1983 %2 = icmp eq <8 x i32> %0, %1
1984 %3 = bitcast i8 %__u to <8 x i1>
1985 %4 = and <8 x i1> %3, %2
1986 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1987 %6 = bitcast <16 x i1> %5 to i16
1992 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1993 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
1994 ; VLX: # %bb.0: # %entry
1995 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
1996 ; VLX-NEXT: kmovd %k0, %eax
1997 ; VLX-NEXT: vzeroupper
2000 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
2001 ; NoVLX: # %bb.0: # %entry
2002 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2003 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2004 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2005 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2006 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2007 ; NoVLX-NEXT: kmovw %k0, %eax
2008 ; NoVLX-NEXT: vzeroupper
2011 %0 = bitcast <4 x i64> %__a to <8 x i32>
2012 %1 = bitcast <4 x i64> %__b to <8 x i32>
2013 %2 = icmp eq <8 x i32> %0, %1
2014 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2015 %4 = bitcast <32 x i1> %3 to i32
2019 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2020 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2021 ; VLX: # %bb.0: # %entry
2022 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
2023 ; VLX-NEXT: kmovd %k0, %eax
2024 ; VLX-NEXT: vzeroupper
2027 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2028 ; NoVLX: # %bb.0: # %entry
2029 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2030 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
2031 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2032 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2033 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2034 ; NoVLX-NEXT: kmovw %k0, %eax
2035 ; NoVLX-NEXT: vzeroupper
2038 %0 = bitcast <4 x i64> %__a to <8 x i32>
2039 %load = load <4 x i64>, <4 x i64>* %__b
2040 %1 = bitcast <4 x i64> %load to <8 x i32>
2041 %2 = icmp eq <8 x i32> %0, %1
2042 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2043 %4 = bitcast <32 x i1> %3 to i32
2047 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2048 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2049 ; VLX: # %bb.0: # %entry
2050 ; VLX-NEXT: kmovd %edi, %k1
2051 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2052 ; VLX-NEXT: kmovd %k0, %eax
2053 ; VLX-NEXT: vzeroupper
2056 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2057 ; NoVLX: # %bb.0: # %entry
2058 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2059 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2060 ; NoVLX-NEXT: kmovw %edi, %k1
2061 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2062 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2063 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2064 ; NoVLX-NEXT: kmovw %k0, %eax
2065 ; NoVLX-NEXT: vzeroupper
2068 %0 = bitcast <4 x i64> %__a to <8 x i32>
2069 %1 = bitcast <4 x i64> %__b to <8 x i32>
2070 %2 = icmp eq <8 x i32> %0, %1
2071 %3 = bitcast i8 %__u to <8 x i1>
2072 %4 = and <8 x i1> %2, %3
2073 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2074 %6 = bitcast <32 x i1> %5 to i32
2078 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2079 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2080 ; VLX: # %bb.0: # %entry
2081 ; VLX-NEXT: kmovd %edi, %k1
2082 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2083 ; VLX-NEXT: kmovd %k0, %eax
2084 ; VLX-NEXT: vzeroupper
2087 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2088 ; NoVLX: # %bb.0: # %entry
2089 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2090 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
2091 ; NoVLX-NEXT: kmovw %edi, %k1
2092 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2093 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2094 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2095 ; NoVLX-NEXT: kmovw %k0, %eax
2096 ; NoVLX-NEXT: vzeroupper
2099 %0 = bitcast <4 x i64> %__a to <8 x i32>
2100 %load = load <4 x i64>, <4 x i64>* %__b
2101 %1 = bitcast <4 x i64> %load to <8 x i32>
2102 %2 = icmp eq <8 x i32> %0, %1
2103 %3 = bitcast i8 %__u to <8 x i1>
2104 %4 = and <8 x i1> %2, %3
2105 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2106 %6 = bitcast <32 x i1> %5 to i32
2111 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
2112 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2113 ; VLX: # %bb.0: # %entry
2114 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2115 ; VLX-NEXT: kmovd %k0, %eax
2116 ; VLX-NEXT: vzeroupper
2119 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2120 ; NoVLX: # %bb.0: # %entry
2121 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2122 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
2123 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2124 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2125 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2126 ; NoVLX-NEXT: kmovw %k0, %eax
2127 ; NoVLX-NEXT: vzeroupper
2130 %0 = bitcast <4 x i64> %__a to <8 x i32>
2131 %load = load i32, i32* %__b
2132 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2133 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2134 %2 = icmp eq <8 x i32> %0, %1
2135 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2136 %4 = bitcast <32 x i1> %3 to i32
2140 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
2141 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2142 ; VLX: # %bb.0: # %entry
2143 ; VLX-NEXT: kmovd %edi, %k1
2144 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2145 ; VLX-NEXT: kmovd %k0, %eax
2146 ; VLX-NEXT: vzeroupper
2149 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2150 ; NoVLX: # %bb.0: # %entry
2151 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2152 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
2153 ; NoVLX-NEXT: kmovw %edi, %k1
2154 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2155 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2156 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2157 ; NoVLX-NEXT: kmovw %k0, %eax
2158 ; NoVLX-NEXT: vzeroupper
2161 %0 = bitcast <4 x i64> %__a to <8 x i32>
2162 %load = load i32, i32* %__b
2163 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2164 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2165 %2 = icmp eq <8 x i32> %0, %1
2166 %3 = bitcast i8 %__u to <8 x i1>
2167 %4 = and <8 x i1> %3, %2
2168 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2169 %6 = bitcast <32 x i1> %5 to i32
2174 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2175 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2176 ; VLX: # %bb.0: # %entry
2177 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
2178 ; VLX-NEXT: kmovq %k0, %rax
2179 ; VLX-NEXT: vzeroupper
2182 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2183 ; NoVLX: # %bb.0: # %entry
2184 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2185 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2186 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2187 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2188 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2189 ; NoVLX-NEXT: kmovw %k0, %eax
2190 ; NoVLX-NEXT: vzeroupper
2193 %0 = bitcast <4 x i64> %__a to <8 x i32>
2194 %1 = bitcast <4 x i64> %__b to <8 x i32>
2195 %2 = icmp eq <8 x i32> %0, %1
2196 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2197 %4 = bitcast <64 x i1> %3 to i64
2201 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2202 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2203 ; VLX: # %bb.0: # %entry
2204 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
2205 ; VLX-NEXT: kmovq %k0, %rax
2206 ; VLX-NEXT: vzeroupper
2209 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2210 ; NoVLX: # %bb.0: # %entry
2211 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2212 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
2213 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2214 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2215 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2216 ; NoVLX-NEXT: kmovw %k0, %eax
2217 ; NoVLX-NEXT: vzeroupper
2220 %0 = bitcast <4 x i64> %__a to <8 x i32>
2221 %load = load <4 x i64>, <4 x i64>* %__b
2222 %1 = bitcast <4 x i64> %load to <8 x i32>
2223 %2 = icmp eq <8 x i32> %0, %1
2224 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2225 %4 = bitcast <64 x i1> %3 to i64
2229 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2230 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2231 ; VLX: # %bb.0: # %entry
2232 ; VLX-NEXT: kmovd %edi, %k1
2233 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2234 ; VLX-NEXT: kmovq %k0, %rax
2235 ; VLX-NEXT: vzeroupper
2238 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2239 ; NoVLX: # %bb.0: # %entry
2240 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2241 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2242 ; NoVLX-NEXT: kmovw %edi, %k1
2243 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2244 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2245 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2246 ; NoVLX-NEXT: kmovw %k0, %eax
2247 ; NoVLX-NEXT: vzeroupper
2250 %0 = bitcast <4 x i64> %__a to <8 x i32>
2251 %1 = bitcast <4 x i64> %__b to <8 x i32>
2252 %2 = icmp eq <8 x i32> %0, %1
2253 %3 = bitcast i8 %__u to <8 x i1>
2254 %4 = and <8 x i1> %2, %3
2255 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2256 %6 = bitcast <64 x i1> %5 to i64
2260 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2261 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2262 ; VLX: # %bb.0: # %entry
2263 ; VLX-NEXT: kmovd %edi, %k1
2264 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2265 ; VLX-NEXT: kmovq %k0, %rax
2266 ; VLX-NEXT: vzeroupper
2269 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2270 ; NoVLX: # %bb.0: # %entry
2271 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2272 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
2273 ; NoVLX-NEXT: kmovw %edi, %k1
2274 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2275 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2276 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2277 ; NoVLX-NEXT: kmovw %k0, %eax
2278 ; NoVLX-NEXT: vzeroupper
2281 %0 = bitcast <4 x i64> %__a to <8 x i32>
2282 %load = load <4 x i64>, <4 x i64>* %__b
2283 %1 = bitcast <4 x i64> %load to <8 x i32>
2284 %2 = icmp eq <8 x i32> %0, %1
2285 %3 = bitcast i8 %__u to <8 x i1>
2286 %4 = and <8 x i1> %2, %3
2287 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2288 %6 = bitcast <64 x i1> %5 to i64
2293 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
2294 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2295 ; VLX: # %bb.0: # %entry
2296 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2297 ; VLX-NEXT: kmovq %k0, %rax
2298 ; VLX-NEXT: vzeroupper
2301 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2302 ; NoVLX: # %bb.0: # %entry
2303 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2304 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
2305 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2306 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2307 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2308 ; NoVLX-NEXT: kmovw %k0, %eax
2309 ; NoVLX-NEXT: vzeroupper
2312 %0 = bitcast <4 x i64> %__a to <8 x i32>
2313 %load = load i32, i32* %__b
2314 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2315 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2316 %2 = icmp eq <8 x i32> %0, %1
2317 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2318 %4 = bitcast <64 x i1> %3 to i64
2322 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
2323 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2324 ; VLX: # %bb.0: # %entry
2325 ; VLX-NEXT: kmovd %edi, %k1
2326 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2327 ; VLX-NEXT: kmovq %k0, %rax
2328 ; VLX-NEXT: vzeroupper
2331 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2332 ; NoVLX: # %bb.0: # %entry
2333 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2334 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
2335 ; NoVLX-NEXT: kmovw %edi, %k1
2336 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2337 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2338 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2339 ; NoVLX-NEXT: kmovw %k0, %eax
2340 ; NoVLX-NEXT: vzeroupper
2343 %0 = bitcast <4 x i64> %__a to <8 x i32>
2344 %load = load i32, i32* %__b
2345 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2346 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2347 %2 = icmp eq <8 x i32> %0, %1
2348 %3 = bitcast i8 %__u to <8 x i1>
2349 %4 = and <8 x i1> %3, %2
2350 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2351 %6 = bitcast <64 x i1> %5 to i64
2356 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2357 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2358 ; VLX: # %bb.0: # %entry
2359 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2360 ; VLX-NEXT: kmovd %k0, %eax
2361 ; VLX-NEXT: vzeroupper
2364 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2365 ; NoVLX: # %bb.0: # %entry
2366 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2367 ; NoVLX-NEXT: kmovw %k0, %eax
2368 ; NoVLX-NEXT: vzeroupper
2371 %0 = bitcast <8 x i64> %__a to <16 x i32>
2372 %1 = bitcast <8 x i64> %__b to <16 x i32>
2373 %2 = icmp eq <16 x i32> %0, %1
2374 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2375 %4 = bitcast <32 x i1> %3 to i32
2379 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2380 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2381 ; VLX: # %bb.0: # %entry
2382 ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2383 ; VLX-NEXT: kmovd %k0, %eax
2384 ; VLX-NEXT: vzeroupper
2387 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2388 ; NoVLX: # %bb.0: # %entry
2389 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2390 ; NoVLX-NEXT: kmovw %k0, %eax
2391 ; NoVLX-NEXT: vzeroupper
2394 %0 = bitcast <8 x i64> %__a to <16 x i32>
2395 %load = load <8 x i64>, <8 x i64>* %__b
2396 %1 = bitcast <8 x i64> %load to <16 x i32>
2397 %2 = icmp eq <16 x i32> %0, %1
2398 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2399 %4 = bitcast <32 x i1> %3 to i32
2403 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2404 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2405 ; VLX: # %bb.0: # %entry
2406 ; VLX-NEXT: kmovd %edi, %k1
2407 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2408 ; VLX-NEXT: kmovd %k0, %eax
2409 ; VLX-NEXT: vzeroupper
2412 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2413 ; NoVLX: # %bb.0: # %entry
2414 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2415 ; NoVLX-NEXT: kmovw %k0, %eax
2416 ; NoVLX-NEXT: andl %edi, %eax
2417 ; NoVLX-NEXT: vzeroupper
2420 %0 = bitcast <8 x i64> %__a to <16 x i32>
2421 %1 = bitcast <8 x i64> %__b to <16 x i32>
2422 %2 = icmp eq <16 x i32> %0, %1
2423 %3 = bitcast i16 %__u to <16 x i1>
2424 %4 = and <16 x i1> %2, %3
2425 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2426 %6 = bitcast <32 x i1> %5 to i32
2430 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2431 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2432 ; VLX: # %bb.0: # %entry
2433 ; VLX-NEXT: kmovd %edi, %k1
2434 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2435 ; VLX-NEXT: kmovd %k0, %eax
2436 ; VLX-NEXT: vzeroupper
2439 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2440 ; NoVLX: # %bb.0: # %entry
2441 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
2442 ; NoVLX-NEXT: kmovw %k0, %eax
2443 ; NoVLX-NEXT: andl %edi, %eax
2444 ; NoVLX-NEXT: vzeroupper
2447 %0 = bitcast <8 x i64> %__a to <16 x i32>
2448 %load = load <8 x i64>, <8 x i64>* %__b
2449 %1 = bitcast <8 x i64> %load to <16 x i32>
2450 %2 = icmp eq <16 x i32> %0, %1
2451 %3 = bitcast i16 %__u to <16 x i1>
2452 %4 = and <16 x i1> %2, %3
2453 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2454 %6 = bitcast <32 x i1> %5 to i32
2459 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
2460 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2461 ; VLX: # %bb.0: # %entry
2462 ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2463 ; VLX-NEXT: kmovd %k0, %eax
2464 ; VLX-NEXT: vzeroupper
2467 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2468 ; NoVLX: # %bb.0: # %entry
2469 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2470 ; NoVLX-NEXT: kmovw %k0, %eax
2471 ; NoVLX-NEXT: vzeroupper
2474 %0 = bitcast <8 x i64> %__a to <16 x i32>
2475 %load = load i32, i32* %__b
2476 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2477 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2478 %2 = icmp eq <16 x i32> %0, %1
2479 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2480 %4 = bitcast <32 x i1> %3 to i32
2484 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
2485 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2486 ; VLX: # %bb.0: # %entry
2487 ; VLX-NEXT: kmovd %edi, %k1
2488 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2489 ; VLX-NEXT: kmovd %k0, %eax
2490 ; VLX-NEXT: vzeroupper
2493 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2494 ; NoVLX: # %bb.0: # %entry
2495 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2496 ; NoVLX-NEXT: kmovw %k0, %eax
2497 ; NoVLX-NEXT: andl %edi, %eax
2498 ; NoVLX-NEXT: vzeroupper
2501 %0 = bitcast <8 x i64> %__a to <16 x i32>
2502 %load = load i32, i32* %__b
2503 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2504 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2505 %2 = icmp eq <16 x i32> %0, %1
2506 %3 = bitcast i16 %__u to <16 x i1>
2507 %4 = and <16 x i1> %3, %2
2508 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2509 %6 = bitcast <32 x i1> %5 to i32
2514 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2515 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2516 ; VLX: # %bb.0: # %entry
2517 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2518 ; VLX-NEXT: kmovq %k0, %rax
2519 ; VLX-NEXT: vzeroupper
2522 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2523 ; NoVLX: # %bb.0: # %entry
2524 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2525 ; NoVLX-NEXT: kmovw %k0, %eax
2526 ; NoVLX-NEXT: vzeroupper
2529 %0 = bitcast <8 x i64> %__a to <16 x i32>
2530 %1 = bitcast <8 x i64> %__b to <16 x i32>
2531 %2 = icmp eq <16 x i32> %0, %1
2532 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2533 %4 = bitcast <64 x i1> %3 to i64
2537 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2538 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2539 ; VLX: # %bb.0: # %entry
2540 ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2541 ; VLX-NEXT: kmovq %k0, %rax
2542 ; VLX-NEXT: vzeroupper
2545 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2546 ; NoVLX: # %bb.0: # %entry
2547 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2548 ; NoVLX-NEXT: kmovw %k0, %eax
2549 ; NoVLX-NEXT: vzeroupper
2552 %0 = bitcast <8 x i64> %__a to <16 x i32>
2553 %load = load <8 x i64>, <8 x i64>* %__b
2554 %1 = bitcast <8 x i64> %load to <16 x i32>
2555 %2 = icmp eq <16 x i32> %0, %1
2556 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2557 %4 = bitcast <64 x i1> %3 to i64
2561 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2562 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2563 ; VLX: # %bb.0: # %entry
2564 ; VLX-NEXT: kmovd %edi, %k1
2565 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2566 ; VLX-NEXT: kmovq %k0, %rax
2567 ; VLX-NEXT: vzeroupper
2570 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2571 ; NoVLX: # %bb.0: # %entry
2572 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2573 ; NoVLX-NEXT: kmovw %k0, %eax
2574 ; NoVLX-NEXT: andl %edi, %eax
2575 ; NoVLX-NEXT: vzeroupper
2578 %0 = bitcast <8 x i64> %__a to <16 x i32>
2579 %1 = bitcast <8 x i64> %__b to <16 x i32>
2580 %2 = icmp eq <16 x i32> %0, %1
2581 %3 = bitcast i16 %__u to <16 x i1>
2582 %4 = and <16 x i1> %2, %3
2583 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2584 %6 = bitcast <64 x i1> %5 to i64
2588 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2589 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2590 ; VLX: # %bb.0: # %entry
2591 ; VLX-NEXT: kmovd %edi, %k1
2592 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2593 ; VLX-NEXT: kmovq %k0, %rax
2594 ; VLX-NEXT: vzeroupper
2597 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2598 ; NoVLX: # %bb.0: # %entry
2599 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
2600 ; NoVLX-NEXT: kmovw %k0, %eax
2601 ; NoVLX-NEXT: andl %edi, %eax
2602 ; NoVLX-NEXT: vzeroupper
2605 %0 = bitcast <8 x i64> %__a to <16 x i32>
2606 %load = load <8 x i64>, <8 x i64>* %__b
2607 %1 = bitcast <8 x i64> %load to <16 x i32>
2608 %2 = icmp eq <16 x i32> %0, %1
2609 %3 = bitcast i16 %__u to <16 x i1>
2610 %4 = and <16 x i1> %2, %3
2611 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2612 %6 = bitcast <64 x i1> %5 to i64
2617 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
2618 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2619 ; VLX: # %bb.0: # %entry
2620 ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2621 ; VLX-NEXT: kmovq %k0, %rax
2622 ; VLX-NEXT: vzeroupper
2625 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2626 ; NoVLX: # %bb.0: # %entry
2627 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2628 ; NoVLX-NEXT: kmovw %k0, %eax
2629 ; NoVLX-NEXT: vzeroupper
2632 %0 = bitcast <8 x i64> %__a to <16 x i32>
2633 %load = load i32, i32* %__b
2634 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2635 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2636 %2 = icmp eq <16 x i32> %0, %1
2637 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2638 %4 = bitcast <64 x i1> %3 to i64
2642 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
2643 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2644 ; VLX: # %bb.0: # %entry
2645 ; VLX-NEXT: kmovd %edi, %k1
2646 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2647 ; VLX-NEXT: kmovq %k0, %rax
2648 ; VLX-NEXT: vzeroupper
2651 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2652 ; NoVLX: # %bb.0: # %entry
2653 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2654 ; NoVLX-NEXT: kmovw %k0, %eax
2655 ; NoVLX-NEXT: andl %edi, %eax
2656 ; NoVLX-NEXT: vzeroupper
2659 %0 = bitcast <8 x i64> %__a to <16 x i32>
2660 %load = load i32, i32* %__b
2661 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2662 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2663 %2 = icmp eq <16 x i32> %0, %1
2664 %3 = bitcast i16 %__u to <16 x i1>
2665 %4 = and <16 x i1> %3, %2
2666 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2667 %6 = bitcast <64 x i1> %5 to i64
2672 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2673 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2674 ; VLX: # %bb.0: # %entry
2675 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
2676 ; VLX-NEXT: kmovb %k0, %eax
2679 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2680 ; NoVLX: # %bb.0: # %entry
2681 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2682 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2683 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2684 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2685 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2686 ; NoVLX-NEXT: kmovw %k0, %eax
2687 ; NoVLX-NEXT: vzeroupper
2690 %0 = bitcast <2 x i64> %__a to <2 x i64>
2691 %1 = bitcast <2 x i64> %__b to <2 x i64>
2692 %2 = icmp eq <2 x i64> %0, %1
2693 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2694 %4 = bitcast <4 x i1> %3 to i4
2698 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2699 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2700 ; VLX: # %bb.0: # %entry
2701 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
2702 ; VLX-NEXT: kmovb %k0, %eax
2705 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2706 ; NoVLX: # %bb.0: # %entry
2707 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2708 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2709 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2710 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2711 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2712 ; NoVLX-NEXT: kmovw %k0, %eax
2713 ; NoVLX-NEXT: vzeroupper
2716 %0 = bitcast <2 x i64> %__a to <2 x i64>
2717 %load = load <2 x i64>, <2 x i64>* %__b
2718 %1 = bitcast <2 x i64> %load to <2 x i64>
2719 %2 = icmp eq <2 x i64> %0, %1
2720 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2721 %4 = bitcast <4 x i1> %3 to i4
2725 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2726 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2727 ; VLX: # %bb.0: # %entry
2728 ; VLX-NEXT: kmovd %edi, %k1
2729 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2730 ; VLX-NEXT: kmovb %k0, %eax
2733 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2734 ; NoVLX: # %bb.0: # %entry
2735 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2736 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2737 ; NoVLX-NEXT: kmovw %edi, %k1
2738 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2739 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2740 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2741 ; NoVLX-NEXT: kmovw %k0, %eax
2742 ; NoVLX-NEXT: vzeroupper
2745 %0 = bitcast <2 x i64> %__a to <2 x i64>
2746 %1 = bitcast <2 x i64> %__b to <2 x i64>
2747 %2 = icmp eq <2 x i64> %0, %1
2748 %3 = bitcast i8 %__u to <8 x i1>
2749 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2750 %4 = and <2 x i1> %2, %extract.i
2751 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2752 %6 = bitcast <4 x i1> %5 to i4
2756 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2757 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2758 ; VLX: # %bb.0: # %entry
2759 ; VLX-NEXT: kmovd %edi, %k1
2760 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2761 ; VLX-NEXT: kmovb %k0, %eax
2764 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2765 ; NoVLX: # %bb.0: # %entry
2766 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2767 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2768 ; NoVLX-NEXT: kmovw %edi, %k1
2769 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2770 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2771 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2772 ; NoVLX-NEXT: kmovw %k0, %eax
2773 ; NoVLX-NEXT: vzeroupper
2776 %0 = bitcast <2 x i64> %__a to <2 x i64>
2777 %load = load <2 x i64>, <2 x i64>* %__b
2778 %1 = bitcast <2 x i64> %load to <2 x i64>
2779 %2 = icmp eq <2 x i64> %0, %1
2780 %3 = bitcast i8 %__u to <8 x i1>
2781 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2782 %4 = and <2 x i1> %2, %extract.i
2783 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2784 %6 = bitcast <4 x i1> %5 to i4
2789 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
2790 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2791 ; VLX: # %bb.0: # %entry
2792 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
2793 ; VLX-NEXT: kmovb %k0, %eax
2796 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2797 ; NoVLX: # %bb.0: # %entry
2798 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2799 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
2800 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2801 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2802 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2803 ; NoVLX-NEXT: kmovw %k0, %eax
2804 ; NoVLX-NEXT: vzeroupper
2807 %0 = bitcast <2 x i64> %__a to <2 x i64>
2808 %load = load i64, i64* %__b
2809 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2810 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2811 %2 = icmp eq <2 x i64> %0, %1
2812 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2813 %4 = bitcast <4 x i1> %3 to i4
2817 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
2818 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2819 ; VLX: # %bb.0: # %entry
2820 ; VLX-NEXT: kmovd %edi, %k1
2821 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
2822 ; VLX-NEXT: kmovb %k0, %eax
2825 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2826 ; NoVLX: # %bb.0: # %entry
2827 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2828 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
2829 ; NoVLX-NEXT: kmovw %edi, %k1
2830 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2831 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2832 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2833 ; NoVLX-NEXT: kmovw %k0, %eax
2834 ; NoVLX-NEXT: vzeroupper
2837 %0 = bitcast <2 x i64> %__a to <2 x i64>
2838 %load = load i64, i64* %__b
2839 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2840 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2841 %2 = icmp eq <2 x i64> %0, %1
2842 %3 = bitcast i8 %__u to <8 x i1>
2843 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2844 %4 = and <2 x i1> %extract.i, %2
2845 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2846 %6 = bitcast <4 x i1> %5 to i4
2851 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2852 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2853 ; VLX: # %bb.0: # %entry
2854 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
2855 ; VLX-NEXT: kmovd %k0, %eax
2856 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2859 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2860 ; NoVLX: # %bb.0: # %entry
2861 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2862 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2863 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2864 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2865 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2866 ; NoVLX-NEXT: kmovw %k0, %eax
2867 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2868 ; NoVLX-NEXT: vzeroupper
2871 %0 = bitcast <2 x i64> %__a to <2 x i64>
2872 %1 = bitcast <2 x i64> %__b to <2 x i64>
2873 %2 = icmp eq <2 x i64> %0, %1
2874 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2875 %4 = bitcast <8 x i1> %3 to i8
2879 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2880 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2881 ; VLX: # %bb.0: # %entry
2882 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
2883 ; VLX-NEXT: kmovd %k0, %eax
2884 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2887 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2888 ; NoVLX: # %bb.0: # %entry
2889 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2890 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2891 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2892 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2893 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2894 ; NoVLX-NEXT: kmovw %k0, %eax
2895 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2896 ; NoVLX-NEXT: vzeroupper
2899 %0 = bitcast <2 x i64> %__a to <2 x i64>
2900 %load = load <2 x i64>, <2 x i64>* %__b
2901 %1 = bitcast <2 x i64> %load to <2 x i64>
2902 %2 = icmp eq <2 x i64> %0, %1
2903 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2904 %4 = bitcast <8 x i1> %3 to i8
2908 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2909 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2910 ; VLX: # %bb.0: # %entry
2911 ; VLX-NEXT: kmovd %edi, %k1
2912 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2913 ; VLX-NEXT: kmovd %k0, %eax
2914 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2917 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2918 ; NoVLX: # %bb.0: # %entry
2919 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2920 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2921 ; NoVLX-NEXT: kmovw %edi, %k1
2922 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2923 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2924 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2925 ; NoVLX-NEXT: kmovw %k0, %eax
2926 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2927 ; NoVLX-NEXT: vzeroupper
2930 %0 = bitcast <2 x i64> %__a to <2 x i64>
2931 %1 = bitcast <2 x i64> %__b to <2 x i64>
2932 %2 = icmp eq <2 x i64> %0, %1
2933 %3 = bitcast i8 %__u to <8 x i1>
2934 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2935 %4 = and <2 x i1> %2, %extract.i
2936 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2937 %6 = bitcast <8 x i1> %5 to i8
2941 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2942 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2943 ; VLX: # %bb.0: # %entry
2944 ; VLX-NEXT: kmovd %edi, %k1
2945 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2946 ; VLX-NEXT: kmovd %k0, %eax
2947 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2950 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2951 ; NoVLX: # %bb.0: # %entry
2952 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2953 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2954 ; NoVLX-NEXT: kmovw %edi, %k1
2955 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2956 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2957 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2958 ; NoVLX-NEXT: kmovw %k0, %eax
2959 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2960 ; NoVLX-NEXT: vzeroupper
2963 %0 = bitcast <2 x i64> %__a to <2 x i64>
2964 %load = load <2 x i64>, <2 x i64>* %__b
2965 %1 = bitcast <2 x i64> %load to <2 x i64>
2966 %2 = icmp eq <2 x i64> %0, %1
2967 %3 = bitcast i8 %__u to <8 x i1>
2968 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2969 %4 = and <2 x i1> %2, %extract.i
2970 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2971 %6 = bitcast <8 x i1> %5 to i8
2976 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
2977 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
2978 ; VLX: # %bb.0: # %entry
2979 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
2980 ; VLX-NEXT: kmovd %k0, %eax
2981 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2984 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
2985 ; NoVLX: # %bb.0: # %entry
2986 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2987 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
2988 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2989 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2990 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2991 ; NoVLX-NEXT: kmovw %k0, %eax
2992 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2993 ; NoVLX-NEXT: vzeroupper
2996 %0 = bitcast <2 x i64> %__a to <2 x i64>
2997 %load = load i64, i64* %__b
2998 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2999 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3000 %2 = icmp eq <2 x i64> %0, %1
3001 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3002 %4 = bitcast <8 x i1> %3 to i8
3006 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3007 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3008 ; VLX: # %bb.0: # %entry
3009 ; VLX-NEXT: kmovd %edi, %k1
3010 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3011 ; VLX-NEXT: kmovd %k0, %eax
3012 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3015 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3016 ; NoVLX: # %bb.0: # %entry
3017 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3018 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
3019 ; NoVLX-NEXT: kmovw %edi, %k1
3020 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3021 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3022 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3023 ; NoVLX-NEXT: kmovw %k0, %eax
3024 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3025 ; NoVLX-NEXT: vzeroupper
3028 %0 = bitcast <2 x i64> %__a to <2 x i64>
3029 %load = load i64, i64* %__b
3030 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3031 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3032 %2 = icmp eq <2 x i64> %0, %1
3033 %3 = bitcast i8 %__u to <8 x i1>
3034 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3035 %4 = and <2 x i1> %extract.i, %2
3036 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3037 %6 = bitcast <8 x i1> %5 to i8
3042 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3043 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3044 ; VLX: # %bb.0: # %entry
3045 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3046 ; VLX-NEXT: kmovd %k0, %eax
3047 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3050 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3051 ; NoVLX: # %bb.0: # %entry
3052 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3053 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3054 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3055 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3056 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3057 ; NoVLX-NEXT: kmovw %k0, %eax
3058 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3059 ; NoVLX-NEXT: vzeroupper
3062 %0 = bitcast <2 x i64> %__a to <2 x i64>
3063 %1 = bitcast <2 x i64> %__b to <2 x i64>
3064 %2 = icmp eq <2 x i64> %0, %1
3065 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3066 %4 = bitcast <16 x i1> %3 to i16
3070 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3071 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3072 ; VLX: # %bb.0: # %entry
3073 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3074 ; VLX-NEXT: kmovd %k0, %eax
3075 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3078 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3079 ; NoVLX: # %bb.0: # %entry
3080 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3081 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3082 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3083 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3084 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3085 ; NoVLX-NEXT: kmovw %k0, %eax
3086 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3087 ; NoVLX-NEXT: vzeroupper
3090 %0 = bitcast <2 x i64> %__a to <2 x i64>
3091 %load = load <2 x i64>, <2 x i64>* %__b
3092 %1 = bitcast <2 x i64> %load to <2 x i64>
3093 %2 = icmp eq <2 x i64> %0, %1
3094 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3095 %4 = bitcast <16 x i1> %3 to i16
3099 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3100 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3101 ; VLX: # %bb.0: # %entry
3102 ; VLX-NEXT: kmovd %edi, %k1
3103 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3104 ; VLX-NEXT: kmovd %k0, %eax
3105 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3108 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3109 ; NoVLX: # %bb.0: # %entry
3110 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3111 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3112 ; NoVLX-NEXT: kmovw %edi, %k1
3113 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3114 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3115 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3116 ; NoVLX-NEXT: kmovw %k0, %eax
3117 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3118 ; NoVLX-NEXT: vzeroupper
3121 %0 = bitcast <2 x i64> %__a to <2 x i64>
3122 %1 = bitcast <2 x i64> %__b to <2 x i64>
3123 %2 = icmp eq <2 x i64> %0, %1
3124 %3 = bitcast i8 %__u to <8 x i1>
3125 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3126 %4 = and <2 x i1> %2, %extract.i
3127 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3128 %6 = bitcast <16 x i1> %5 to i16
3132 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3133 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3134 ; VLX: # %bb.0: # %entry
3135 ; VLX-NEXT: kmovd %edi, %k1
3136 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3137 ; VLX-NEXT: kmovd %k0, %eax
3138 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3141 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3142 ; NoVLX: # %bb.0: # %entry
3143 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3144 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3145 ; NoVLX-NEXT: kmovw %edi, %k1
3146 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3147 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3148 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3149 ; NoVLX-NEXT: kmovw %k0, %eax
3150 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3151 ; NoVLX-NEXT: vzeroupper
3154 %0 = bitcast <2 x i64> %__a to <2 x i64>
3155 %load = load <2 x i64>, <2 x i64>* %__b
3156 %1 = bitcast <2 x i64> %load to <2 x i64>
3157 %2 = icmp eq <2 x i64> %0, %1
3158 %3 = bitcast i8 %__u to <8 x i1>
3159 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3160 %4 = and <2 x i1> %2, %extract.i
3161 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3162 %6 = bitcast <16 x i1> %5 to i16
3167 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3168 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3169 ; VLX: # %bb.0: # %entry
3170 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3171 ; VLX-NEXT: kmovd %k0, %eax
3172 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3175 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3176 ; NoVLX: # %bb.0: # %entry
3177 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3178 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
3179 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3180 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3181 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3182 ; NoVLX-NEXT: kmovw %k0, %eax
3183 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3184 ; NoVLX-NEXT: vzeroupper
3187 %0 = bitcast <2 x i64> %__a to <2 x i64>
3188 %load = load i64, i64* %__b
3189 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3190 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3191 %2 = icmp eq <2 x i64> %0, %1
3192 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3193 %4 = bitcast <16 x i1> %3 to i16
3197 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3198 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3199 ; VLX: # %bb.0: # %entry
3200 ; VLX-NEXT: kmovd %edi, %k1
3201 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3202 ; VLX-NEXT: kmovd %k0, %eax
3203 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3206 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3207 ; NoVLX: # %bb.0: # %entry
3208 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3209 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
3210 ; NoVLX-NEXT: kmovw %edi, %k1
3211 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3212 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3213 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3214 ; NoVLX-NEXT: kmovw %k0, %eax
3215 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3216 ; NoVLX-NEXT: vzeroupper
3219 %0 = bitcast <2 x i64> %__a to <2 x i64>
3220 %load = load i64, i64* %__b
3221 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3222 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3223 %2 = icmp eq <2 x i64> %0, %1
3224 %3 = bitcast i8 %__u to <8 x i1>
3225 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3226 %4 = and <2 x i1> %extract.i, %2
3227 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3228 %6 = bitcast <16 x i1> %5 to i16
3233 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3234 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3235 ; VLX: # %bb.0: # %entry
3236 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3237 ; VLX-NEXT: kmovd %k0, %eax
3240 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3241 ; NoVLX: # %bb.0: # %entry
3242 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3243 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3244 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3245 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3246 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3247 ; NoVLX-NEXT: kmovw %k0, %eax
3248 ; NoVLX-NEXT: vzeroupper
3251 %0 = bitcast <2 x i64> %__a to <2 x i64>
3252 %1 = bitcast <2 x i64> %__b to <2 x i64>
3253 %2 = icmp eq <2 x i64> %0, %1
3254 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3255 %4 = bitcast <32 x i1> %3 to i32
3259 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3260 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3261 ; VLX: # %bb.0: # %entry
3262 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3263 ; VLX-NEXT: kmovd %k0, %eax
3266 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3267 ; NoVLX: # %bb.0: # %entry
3268 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3269 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3270 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3271 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3272 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3273 ; NoVLX-NEXT: kmovw %k0, %eax
3274 ; NoVLX-NEXT: vzeroupper
3277 %0 = bitcast <2 x i64> %__a to <2 x i64>
3278 %load = load <2 x i64>, <2 x i64>* %__b
3279 %1 = bitcast <2 x i64> %load to <2 x i64>
3280 %2 = icmp eq <2 x i64> %0, %1
3281 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3282 %4 = bitcast <32 x i1> %3 to i32
3286 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3287 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3288 ; VLX: # %bb.0: # %entry
3289 ; VLX-NEXT: kmovd %edi, %k1
3290 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3291 ; VLX-NEXT: kmovd %k0, %eax
3294 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3295 ; NoVLX: # %bb.0: # %entry
3296 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3297 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3298 ; NoVLX-NEXT: kmovw %edi, %k1
3299 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3300 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3301 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3302 ; NoVLX-NEXT: kmovw %k0, %eax
3303 ; NoVLX-NEXT: vzeroupper
3306 %0 = bitcast <2 x i64> %__a to <2 x i64>
3307 %1 = bitcast <2 x i64> %__b to <2 x i64>
3308 %2 = icmp eq <2 x i64> %0, %1
3309 %3 = bitcast i8 %__u to <8 x i1>
3310 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3311 %4 = and <2 x i1> %2, %extract.i
3312 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3313 %6 = bitcast <32 x i1> %5 to i32
3317 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3318 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3319 ; VLX: # %bb.0: # %entry
3320 ; VLX-NEXT: kmovd %edi, %k1
3321 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3322 ; VLX-NEXT: kmovd %k0, %eax
3325 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3326 ; NoVLX: # %bb.0: # %entry
3327 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3328 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3329 ; NoVLX-NEXT: kmovw %edi, %k1
3330 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3331 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3332 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3333 ; NoVLX-NEXT: kmovw %k0, %eax
3334 ; NoVLX-NEXT: vzeroupper
3337 %0 = bitcast <2 x i64> %__a to <2 x i64>
3338 %load = load <2 x i64>, <2 x i64>* %__b
3339 %1 = bitcast <2 x i64> %load to <2 x i64>
3340 %2 = icmp eq <2 x i64> %0, %1
3341 %3 = bitcast i8 %__u to <8 x i1>
3342 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3343 %4 = and <2 x i1> %2, %extract.i
3344 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3345 %6 = bitcast <32 x i1> %5 to i32
3350 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3351 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3352 ; VLX: # %bb.0: # %entry
3353 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3354 ; VLX-NEXT: kmovd %k0, %eax
3357 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3358 ; NoVLX: # %bb.0: # %entry
3359 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3360 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
3361 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3362 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3363 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3364 ; NoVLX-NEXT: kmovw %k0, %eax
3365 ; NoVLX-NEXT: vzeroupper
3368 %0 = bitcast <2 x i64> %__a to <2 x i64>
3369 %load = load i64, i64* %__b
3370 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3371 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3372 %2 = icmp eq <2 x i64> %0, %1
3373 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3374 %4 = bitcast <32 x i1> %3 to i32
3378 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3379 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3380 ; VLX: # %bb.0: # %entry
3381 ; VLX-NEXT: kmovd %edi, %k1
3382 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3383 ; VLX-NEXT: kmovd %k0, %eax
3386 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3387 ; NoVLX: # %bb.0: # %entry
3388 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3389 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
3390 ; NoVLX-NEXT: kmovw %edi, %k1
3391 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3392 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3393 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3394 ; NoVLX-NEXT: kmovw %k0, %eax
3395 ; NoVLX-NEXT: vzeroupper
3398 %0 = bitcast <2 x i64> %__a to <2 x i64>
3399 %load = load i64, i64* %__b
3400 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3401 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3402 %2 = icmp eq <2 x i64> %0, %1
3403 %3 = bitcast i8 %__u to <8 x i1>
3404 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3405 %4 = and <2 x i1> %extract.i, %2
3406 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3407 %6 = bitcast <32 x i1> %5 to i32
3412 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3413 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3414 ; VLX: # %bb.0: # %entry
3415 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3416 ; VLX-NEXT: kmovq %k0, %rax
3419 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3420 ; NoVLX: # %bb.0: # %entry
3421 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3422 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3423 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3424 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3425 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3426 ; NoVLX-NEXT: kmovw %k0, %eax
3427 ; NoVLX-NEXT: vzeroupper
3430 %0 = bitcast <2 x i64> %__a to <2 x i64>
3431 %1 = bitcast <2 x i64> %__b to <2 x i64>
3432 %2 = icmp eq <2 x i64> %0, %1
3433 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3434 %4 = bitcast <64 x i1> %3 to i64
3438 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3439 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3440 ; VLX: # %bb.0: # %entry
3441 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3442 ; VLX-NEXT: kmovq %k0, %rax
3445 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3446 ; NoVLX: # %bb.0: # %entry
3447 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3448 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3449 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3450 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3451 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3452 ; NoVLX-NEXT: kmovw %k0, %eax
3453 ; NoVLX-NEXT: vzeroupper
3456 %0 = bitcast <2 x i64> %__a to <2 x i64>
3457 %load = load <2 x i64>, <2 x i64>* %__b
3458 %1 = bitcast <2 x i64> %load to <2 x i64>
3459 %2 = icmp eq <2 x i64> %0, %1
3460 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3461 %4 = bitcast <64 x i1> %3 to i64
3465 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3466 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3467 ; VLX: # %bb.0: # %entry
3468 ; VLX-NEXT: kmovd %edi, %k1
3469 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3470 ; VLX-NEXT: kmovq %k0, %rax
3473 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3474 ; NoVLX: # %bb.0: # %entry
3475 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3476 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3477 ; NoVLX-NEXT: kmovw %edi, %k1
3478 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3479 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3480 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3481 ; NoVLX-NEXT: kmovw %k0, %eax
3482 ; NoVLX-NEXT: vzeroupper
3485 %0 = bitcast <2 x i64> %__a to <2 x i64>
3486 %1 = bitcast <2 x i64> %__b to <2 x i64>
3487 %2 = icmp eq <2 x i64> %0, %1
3488 %3 = bitcast i8 %__u to <8 x i1>
3489 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3490 %4 = and <2 x i1> %2, %extract.i
3491 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3492 %6 = bitcast <64 x i1> %5 to i64
3496 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3497 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3498 ; VLX: # %bb.0: # %entry
3499 ; VLX-NEXT: kmovd %edi, %k1
3500 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3501 ; VLX-NEXT: kmovq %k0, %rax
3504 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3505 ; NoVLX: # %bb.0: # %entry
3506 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3507 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3508 ; NoVLX-NEXT: kmovw %edi, %k1
3509 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3510 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3511 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3512 ; NoVLX-NEXT: kmovw %k0, %eax
3513 ; NoVLX-NEXT: vzeroupper
3516 %0 = bitcast <2 x i64> %__a to <2 x i64>
3517 %load = load <2 x i64>, <2 x i64>* %__b
3518 %1 = bitcast <2 x i64> %load to <2 x i64>
3519 %2 = icmp eq <2 x i64> %0, %1
3520 %3 = bitcast i8 %__u to <8 x i1>
3521 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3522 %4 = and <2 x i1> %2, %extract.i
3523 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3524 %6 = bitcast <64 x i1> %5 to i64
3529 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3530 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3531 ; VLX: # %bb.0: # %entry
3532 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3533 ; VLX-NEXT: kmovq %k0, %rax
3536 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3537 ; NoVLX: # %bb.0: # %entry
3538 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3539 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
3540 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3541 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3542 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3543 ; NoVLX-NEXT: kmovw %k0, %eax
3544 ; NoVLX-NEXT: vzeroupper
3547 %0 = bitcast <2 x i64> %__a to <2 x i64>
3548 %load = load i64, i64* %__b
3549 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3550 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3551 %2 = icmp eq <2 x i64> %0, %1
3552 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3553 %4 = bitcast <64 x i1> %3 to i64
3557 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3558 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3559 ; VLX: # %bb.0: # %entry
3560 ; VLX-NEXT: kmovd %edi, %k1
3561 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3562 ; VLX-NEXT: kmovq %k0, %rax
3565 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3566 ; NoVLX: # %bb.0: # %entry
3567 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3568 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
3569 ; NoVLX-NEXT: kmovw %edi, %k1
3570 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3571 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3572 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3573 ; NoVLX-NEXT: kmovw %k0, %eax
3574 ; NoVLX-NEXT: vzeroupper
3577 %0 = bitcast <2 x i64> %__a to <2 x i64>
3578 %load = load i64, i64* %__b
3579 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3580 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3581 %2 = icmp eq <2 x i64> %0, %1
3582 %3 = bitcast i8 %__u to <8 x i1>
3583 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3584 %4 = and <2 x i1> %extract.i, %2
3585 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3586 %6 = bitcast <64 x i1> %5 to i64
3591 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3592 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3593 ; VLX: # %bb.0: # %entry
3594 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3595 ; VLX-NEXT: kmovd %k0, %eax
3596 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3597 ; VLX-NEXT: vzeroupper
3600 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3601 ; NoVLX: # %bb.0: # %entry
3602 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3603 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3604 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3605 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3606 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3607 ; NoVLX-NEXT: kmovw %k0, %eax
3608 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3609 ; NoVLX-NEXT: vzeroupper
3612 %0 = bitcast <4 x i64> %__a to <4 x i64>
3613 %1 = bitcast <4 x i64> %__b to <4 x i64>
3614 %2 = icmp eq <4 x i64> %0, %1
3615 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3616 %4 = bitcast <8 x i1> %3 to i8
3620 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3621 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3622 ; VLX: # %bb.0: # %entry
3623 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
3624 ; VLX-NEXT: kmovd %k0, %eax
3625 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3626 ; VLX-NEXT: vzeroupper
3629 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3630 ; NoVLX: # %bb.0: # %entry
3631 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3632 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
3633 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3634 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3635 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3636 ; NoVLX-NEXT: kmovw %k0, %eax
3637 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3638 ; NoVLX-NEXT: vzeroupper
3641 %0 = bitcast <4 x i64> %__a to <4 x i64>
3642 %load = load <4 x i64>, <4 x i64>* %__b
3643 %1 = bitcast <4 x i64> %load to <4 x i64>
3644 %2 = icmp eq <4 x i64> %0, %1
3645 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3646 %4 = bitcast <8 x i1> %3 to i8
3650 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3651 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3652 ; VLX: # %bb.0: # %entry
3653 ; VLX-NEXT: kmovd %edi, %k1
3654 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3655 ; VLX-NEXT: kmovd %k0, %eax
3656 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3657 ; VLX-NEXT: vzeroupper
3660 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3661 ; NoVLX: # %bb.0: # %entry
3662 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3663 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3664 ; NoVLX-NEXT: kmovw %edi, %k1
3665 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3666 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3667 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3668 ; NoVLX-NEXT: kmovw %k0, %eax
3669 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3670 ; NoVLX-NEXT: vzeroupper
3673 %0 = bitcast <4 x i64> %__a to <4 x i64>
3674 %1 = bitcast <4 x i64> %__b to <4 x i64>
3675 %2 = icmp eq <4 x i64> %0, %1
3676 %3 = bitcast i8 %__u to <8 x i1>
3677 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3678 %4 = and <4 x i1> %2, %extract.i
3679 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3680 %6 = bitcast <8 x i1> %5 to i8
3684 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3685 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3686 ; VLX: # %bb.0: # %entry
3687 ; VLX-NEXT: kmovd %edi, %k1
3688 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3689 ; VLX-NEXT: kmovd %k0, %eax
3690 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3691 ; VLX-NEXT: vzeroupper
3694 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3695 ; NoVLX: # %bb.0: # %entry
3696 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3697 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
3698 ; NoVLX-NEXT: kmovw %edi, %k1
3699 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3700 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3701 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3702 ; NoVLX-NEXT: kmovw %k0, %eax
3703 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3704 ; NoVLX-NEXT: vzeroupper
3707 %0 = bitcast <4 x i64> %__a to <4 x i64>
3708 %load = load <4 x i64>, <4 x i64>* %__b
3709 %1 = bitcast <4 x i64> %load to <4 x i64>
3710 %2 = icmp eq <4 x i64> %0, %1
3711 %3 = bitcast i8 %__u to <8 x i1>
3712 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3713 %4 = and <4 x i1> %2, %extract.i
3714 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3715 %6 = bitcast <8 x i1> %5 to i8
3720 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
3721 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3722 ; VLX: # %bb.0: # %entry
3723 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3724 ; VLX-NEXT: kmovd %k0, %eax
3725 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3726 ; VLX-NEXT: vzeroupper
3729 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3730 ; NoVLX: # %bb.0: # %entry
3731 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3732 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
3733 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3734 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3735 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3736 ; NoVLX-NEXT: kmovw %k0, %eax
3737 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3738 ; NoVLX-NEXT: vzeroupper
3741 %0 = bitcast <4 x i64> %__a to <4 x i64>
3742 %load = load i64, i64* %__b
3743 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3744 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3745 %2 = icmp eq <4 x i64> %0, %1
3746 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3747 %4 = bitcast <8 x i1> %3 to i8
3751 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
3752 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3753 ; VLX: # %bb.0: # %entry
3754 ; VLX-NEXT: kmovd %edi, %k1
3755 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3756 ; VLX-NEXT: kmovd %k0, %eax
3757 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3758 ; VLX-NEXT: vzeroupper
3761 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3762 ; NoVLX: # %bb.0: # %entry
3763 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3764 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
3765 ; NoVLX-NEXT: kmovw %edi, %k1
3766 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3767 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3768 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3769 ; NoVLX-NEXT: kmovw %k0, %eax
3770 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3771 ; NoVLX-NEXT: vzeroupper
3774 %0 = bitcast <4 x i64> %__a to <4 x i64>
3775 %load = load i64, i64* %__b
3776 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3777 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3778 %2 = icmp eq <4 x i64> %0, %1
3779 %3 = bitcast i8 %__u to <8 x i1>
3780 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3781 %4 = and <4 x i1> %extract.i, %2
3782 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3783 %6 = bitcast <8 x i1> %5 to i8
3788 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3789 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3790 ; VLX: # %bb.0: # %entry
3791 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3792 ; VLX-NEXT: kmovd %k0, %eax
3793 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3794 ; VLX-NEXT: vzeroupper
3797 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3798 ; NoVLX: # %bb.0: # %entry
3799 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3800 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3801 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3802 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3803 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3804 ; NoVLX-NEXT: kmovw %k0, %eax
3805 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3806 ; NoVLX-NEXT: vzeroupper
3809 %0 = bitcast <4 x i64> %__a to <4 x i64>
3810 %1 = bitcast <4 x i64> %__b to <4 x i64>
3811 %2 = icmp eq <4 x i64> %0, %1
3812 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3813 %4 = bitcast <16 x i1> %3 to i16
3817 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3818 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3819 ; VLX: # %bb.0: # %entry
3820 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
3821 ; VLX-NEXT: kmovd %k0, %eax
3822 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3823 ; VLX-NEXT: vzeroupper
3826 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3827 ; NoVLX: # %bb.0: # %entry
3828 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3829 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
3830 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3831 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3832 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3833 ; NoVLX-NEXT: kmovw %k0, %eax
3834 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3835 ; NoVLX-NEXT: vzeroupper
3838 %0 = bitcast <4 x i64> %__a to <4 x i64>
3839 %load = load <4 x i64>, <4 x i64>* %__b
3840 %1 = bitcast <4 x i64> %load to <4 x i64>
3841 %2 = icmp eq <4 x i64> %0, %1
3842 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3843 %4 = bitcast <16 x i1> %3 to i16
3847 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3848 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3849 ; VLX: # %bb.0: # %entry
3850 ; VLX-NEXT: kmovd %edi, %k1
3851 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3852 ; VLX-NEXT: kmovd %k0, %eax
3853 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3854 ; VLX-NEXT: vzeroupper
3857 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3858 ; NoVLX: # %bb.0: # %entry
3859 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3860 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3861 ; NoVLX-NEXT: kmovw %edi, %k1
3862 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3863 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3864 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3865 ; NoVLX-NEXT: kmovw %k0, %eax
3866 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3867 ; NoVLX-NEXT: vzeroupper
3870 %0 = bitcast <4 x i64> %__a to <4 x i64>
3871 %1 = bitcast <4 x i64> %__b to <4 x i64>
3872 %2 = icmp eq <4 x i64> %0, %1
3873 %3 = bitcast i8 %__u to <8 x i1>
3874 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3875 %4 = and <4 x i1> %2, %extract.i
3876 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3877 %6 = bitcast <16 x i1> %5 to i16
3881 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3882 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3883 ; VLX: # %bb.0: # %entry
3884 ; VLX-NEXT: kmovd %edi, %k1
3885 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3886 ; VLX-NEXT: kmovd %k0, %eax
3887 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3888 ; VLX-NEXT: vzeroupper
3891 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3892 ; NoVLX: # %bb.0: # %entry
3893 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3894 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
3895 ; NoVLX-NEXT: kmovw %edi, %k1
3896 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3897 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3898 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3899 ; NoVLX-NEXT: kmovw %k0, %eax
3900 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3901 ; NoVLX-NEXT: vzeroupper
3904 %0 = bitcast <4 x i64> %__a to <4 x i64>
3905 %load = load <4 x i64>, <4 x i64>* %__b
3906 %1 = bitcast <4 x i64> %load to <4 x i64>
3907 %2 = icmp eq <4 x i64> %0, %1
3908 %3 = bitcast i8 %__u to <8 x i1>
3909 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3910 %4 = and <4 x i1> %2, %extract.i
3911 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3912 %6 = bitcast <16 x i1> %5 to i16
3917 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
3918 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3919 ; VLX: # %bb.0: # %entry
3920 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3921 ; VLX-NEXT: kmovd %k0, %eax
3922 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3923 ; VLX-NEXT: vzeroupper
3926 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3927 ; NoVLX: # %bb.0: # %entry
3928 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3929 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
3930 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3931 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3932 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3933 ; NoVLX-NEXT: kmovw %k0, %eax
3934 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3935 ; NoVLX-NEXT: vzeroupper
3938 %0 = bitcast <4 x i64> %__a to <4 x i64>
3939 %load = load i64, i64* %__b
3940 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3941 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3942 %2 = icmp eq <4 x i64> %0, %1
3943 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3944 %4 = bitcast <16 x i1> %3 to i16
3948 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
3949 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3950 ; VLX: # %bb.0: # %entry
3951 ; VLX-NEXT: kmovd %edi, %k1
3952 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3953 ; VLX-NEXT: kmovd %k0, %eax
3954 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3955 ; VLX-NEXT: vzeroupper
3958 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3959 ; NoVLX: # %bb.0: # %entry
3960 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3961 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
3962 ; NoVLX-NEXT: kmovw %edi, %k1
3963 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3964 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3965 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3966 ; NoVLX-NEXT: kmovw %k0, %eax
3967 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3968 ; NoVLX-NEXT: vzeroupper
3971 %0 = bitcast <4 x i64> %__a to <4 x i64>
3972 %load = load i64, i64* %__b
3973 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3974 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3975 %2 = icmp eq <4 x i64> %0, %1
3976 %3 = bitcast i8 %__u to <8 x i1>
3977 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3978 %4 = and <4 x i1> %extract.i, %2
3979 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3980 %6 = bitcast <16 x i1> %5 to i16
3985 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3986 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
3987 ; VLX: # %bb.0: # %entry
3988 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3989 ; VLX-NEXT: kmovd %k0, %eax
3990 ; VLX-NEXT: vzeroupper
3993 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
3994 ; NoVLX: # %bb.0: # %entry
3995 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3996 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3997 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3998 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3999 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4000 ; NoVLX-NEXT: kmovw %k0, %eax
4001 ; NoVLX-NEXT: vzeroupper
4004 %0 = bitcast <4 x i64> %__a to <4 x i64>
4005 %1 = bitcast <4 x i64> %__b to <4 x i64>
4006 %2 = icmp eq <4 x i64> %0, %1
4007 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4008 %4 = bitcast <32 x i1> %3 to i32
4012 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4013 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
4014 ; VLX: # %bb.0: # %entry
4015 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
4016 ; VLX-NEXT: kmovd %k0, %eax
4017 ; VLX-NEXT: vzeroupper
4020 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
4021 ; NoVLX: # %bb.0: # %entry
4022 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4023 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
4024 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4025 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4026 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4027 ; NoVLX-NEXT: kmovw %k0, %eax
4028 ; NoVLX-NEXT: vzeroupper
4031 %0 = bitcast <4 x i64> %__a to <4 x i64>
4032 %load = load <4 x i64>, <4 x i64>* %__b
4033 %1 = bitcast <4 x i64> %load to <4 x i64>
4034 %2 = icmp eq <4 x i64> %0, %1
4035 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4036 %4 = bitcast <32 x i1> %3 to i32
4040 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4041 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4042 ; VLX: # %bb.0: # %entry
4043 ; VLX-NEXT: kmovd %edi, %k1
4044 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4045 ; VLX-NEXT: kmovd %k0, %eax
4046 ; VLX-NEXT: vzeroupper
4049 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4050 ; NoVLX: # %bb.0: # %entry
4051 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4052 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4053 ; NoVLX-NEXT: kmovw %edi, %k1
4054 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4055 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4056 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4057 ; NoVLX-NEXT: kmovw %k0, %eax
4058 ; NoVLX-NEXT: vzeroupper
4061 %0 = bitcast <4 x i64> %__a to <4 x i64>
4062 %1 = bitcast <4 x i64> %__b to <4 x i64>
4063 %2 = icmp eq <4 x i64> %0, %1
4064 %3 = bitcast i8 %__u to <8 x i1>
4065 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4066 %4 = and <4 x i1> %2, %extract.i
4067 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4068 %6 = bitcast <32 x i1> %5 to i32
4072 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4073 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4074 ; VLX: # %bb.0: # %entry
4075 ; VLX-NEXT: kmovd %edi, %k1
4076 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4077 ; VLX-NEXT: kmovd %k0, %eax
4078 ; VLX-NEXT: vzeroupper
4081 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4082 ; NoVLX: # %bb.0: # %entry
4083 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4084 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
4085 ; NoVLX-NEXT: kmovw %edi, %k1
4086 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4087 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4088 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4089 ; NoVLX-NEXT: kmovw %k0, %eax
4090 ; NoVLX-NEXT: vzeroupper
4093 %0 = bitcast <4 x i64> %__a to <4 x i64>
4094 %load = load <4 x i64>, <4 x i64>* %__b
4095 %1 = bitcast <4 x i64> %load to <4 x i64>
4096 %2 = icmp eq <4 x i64> %0, %1
4097 %3 = bitcast i8 %__u to <8 x i1>
4098 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4099 %4 = and <4 x i1> %2, %extract.i
4100 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4101 %6 = bitcast <32 x i1> %5 to i32
4106 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
4107 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4108 ; VLX: # %bb.0: # %entry
4109 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4110 ; VLX-NEXT: kmovd %k0, %eax
4111 ; VLX-NEXT: vzeroupper
4114 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4115 ; NoVLX: # %bb.0: # %entry
4116 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4117 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
4118 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4119 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4120 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4121 ; NoVLX-NEXT: kmovw %k0, %eax
4122 ; NoVLX-NEXT: vzeroupper
4125 %0 = bitcast <4 x i64> %__a to <4 x i64>
4126 %load = load i64, i64* %__b
4127 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4128 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4129 %2 = icmp eq <4 x i64> %0, %1
4130 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4131 %4 = bitcast <32 x i1> %3 to i32
4135 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
4136 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4137 ; VLX: # %bb.0: # %entry
4138 ; VLX-NEXT: kmovd %edi, %k1
4139 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4140 ; VLX-NEXT: kmovd %k0, %eax
4141 ; VLX-NEXT: vzeroupper
4144 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4145 ; NoVLX: # %bb.0: # %entry
4146 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4147 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
4148 ; NoVLX-NEXT: kmovw %edi, %k1
4149 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4150 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4151 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4152 ; NoVLX-NEXT: kmovw %k0, %eax
4153 ; NoVLX-NEXT: vzeroupper
4156 %0 = bitcast <4 x i64> %__a to <4 x i64>
4157 %load = load i64, i64* %__b
4158 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4159 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4160 %2 = icmp eq <4 x i64> %0, %1
4161 %3 = bitcast i8 %__u to <8 x i1>
4162 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4163 %4 = and <4 x i1> %extract.i, %2
4164 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4165 %6 = bitcast <32 x i1> %5 to i32
4170 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4171 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4172 ; VLX: # %bb.0: # %entry
4173 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
4174 ; VLX-NEXT: kmovq %k0, %rax
4175 ; VLX-NEXT: vzeroupper
4178 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4179 ; NoVLX: # %bb.0: # %entry
4180 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4181 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4182 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4183 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4184 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4185 ; NoVLX-NEXT: kmovw %k0, %eax
4186 ; NoVLX-NEXT: vzeroupper
4189 %0 = bitcast <4 x i64> %__a to <4 x i64>
4190 %1 = bitcast <4 x i64> %__b to <4 x i64>
4191 %2 = icmp eq <4 x i64> %0, %1
4192 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4193 %4 = bitcast <64 x i1> %3 to i64
4197 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4198 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4199 ; VLX: # %bb.0: # %entry
4200 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
4201 ; VLX-NEXT: kmovq %k0, %rax
4202 ; VLX-NEXT: vzeroupper
4205 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4206 ; NoVLX: # %bb.0: # %entry
4207 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4208 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
4209 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4210 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4211 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4212 ; NoVLX-NEXT: kmovw %k0, %eax
4213 ; NoVLX-NEXT: vzeroupper
4216 %0 = bitcast <4 x i64> %__a to <4 x i64>
4217 %load = load <4 x i64>, <4 x i64>* %__b
4218 %1 = bitcast <4 x i64> %load to <4 x i64>
4219 %2 = icmp eq <4 x i64> %0, %1
4220 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4221 %4 = bitcast <64 x i1> %3 to i64
4225 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4226 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4227 ; VLX: # %bb.0: # %entry
4228 ; VLX-NEXT: kmovd %edi, %k1
4229 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4230 ; VLX-NEXT: kmovq %k0, %rax
4231 ; VLX-NEXT: vzeroupper
4234 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4235 ; NoVLX: # %bb.0: # %entry
4236 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4237 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4238 ; NoVLX-NEXT: kmovw %edi, %k1
4239 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4240 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4241 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4242 ; NoVLX-NEXT: kmovw %k0, %eax
4243 ; NoVLX-NEXT: vzeroupper
4246 %0 = bitcast <4 x i64> %__a to <4 x i64>
4247 %1 = bitcast <4 x i64> %__b to <4 x i64>
4248 %2 = icmp eq <4 x i64> %0, %1
4249 %3 = bitcast i8 %__u to <8 x i1>
4250 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4251 %4 = and <4 x i1> %2, %extract.i
4252 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4253 %6 = bitcast <64 x i1> %5 to i64
4257 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4258 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4259 ; VLX: # %bb.0: # %entry
4260 ; VLX-NEXT: kmovd %edi, %k1
4261 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4262 ; VLX-NEXT: kmovq %k0, %rax
4263 ; VLX-NEXT: vzeroupper
4266 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4267 ; NoVLX: # %bb.0: # %entry
4268 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4269 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
4270 ; NoVLX-NEXT: kmovw %edi, %k1
4271 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4272 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4273 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4274 ; NoVLX-NEXT: kmovw %k0, %eax
4275 ; NoVLX-NEXT: vzeroupper
4278 %0 = bitcast <4 x i64> %__a to <4 x i64>
4279 %load = load <4 x i64>, <4 x i64>* %__b
4280 %1 = bitcast <4 x i64> %load to <4 x i64>
4281 %2 = icmp eq <4 x i64> %0, %1
4282 %3 = bitcast i8 %__u to <8 x i1>
4283 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4284 %4 = and <4 x i1> %2, %extract.i
4285 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4286 %6 = bitcast <64 x i1> %5 to i64
4291 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
4292 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4293 ; VLX: # %bb.0: # %entry
4294 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4295 ; VLX-NEXT: kmovq %k0, %rax
4296 ; VLX-NEXT: vzeroupper
4299 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4300 ; NoVLX: # %bb.0: # %entry
4301 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4302 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
4303 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4304 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4305 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4306 ; NoVLX-NEXT: kmovw %k0, %eax
4307 ; NoVLX-NEXT: vzeroupper
4310 %0 = bitcast <4 x i64> %__a to <4 x i64>
4311 %load = load i64, i64* %__b
4312 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4313 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4314 %2 = icmp eq <4 x i64> %0, %1
4315 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4316 %4 = bitcast <64 x i1> %3 to i64
4320 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
4321 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4322 ; VLX: # %bb.0: # %entry
4323 ; VLX-NEXT: kmovd %edi, %k1
4324 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4325 ; VLX-NEXT: kmovq %k0, %rax
4326 ; VLX-NEXT: vzeroupper
4329 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4330 ; NoVLX: # %bb.0: # %entry
4331 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4332 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
4333 ; NoVLX-NEXT: kmovw %edi, %k1
4334 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4335 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4336 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4337 ; NoVLX-NEXT: kmovw %k0, %eax
4338 ; NoVLX-NEXT: vzeroupper
4341 %0 = bitcast <4 x i64> %__a to <4 x i64>
4342 %load = load i64, i64* %__b
4343 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4344 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4345 %2 = icmp eq <4 x i64> %0, %1
4346 %3 = bitcast i8 %__u to <8 x i1>
4347 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4348 %4 = and <4 x i1> %extract.i, %2
4349 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4350 %6 = bitcast <64 x i1> %5 to i64
4355 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4356 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4357 ; VLX: # %bb.0: # %entry
4358 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4359 ; VLX-NEXT: kmovd %k0, %eax
4360 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4361 ; VLX-NEXT: vzeroupper
4364 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4365 ; NoVLX: # %bb.0: # %entry
4366 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4367 ; NoVLX-NEXT: kmovw %k0, %eax
4368 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4369 ; NoVLX-NEXT: vzeroupper
4372 %0 = bitcast <8 x i64> %__a to <8 x i64>
4373 %1 = bitcast <8 x i64> %__b to <8 x i64>
4374 %2 = icmp eq <8 x i64> %0, %1
4375 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4376 %4 = bitcast <16 x i1> %3 to i16
4380 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4381 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4382 ; VLX: # %bb.0: # %entry
4383 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4384 ; VLX-NEXT: kmovd %k0, %eax
4385 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4386 ; VLX-NEXT: vzeroupper
4389 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4390 ; NoVLX: # %bb.0: # %entry
4391 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4392 ; NoVLX-NEXT: kmovw %k0, %eax
4393 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4394 ; NoVLX-NEXT: vzeroupper
4397 %0 = bitcast <8 x i64> %__a to <8 x i64>
4398 %load = load <8 x i64>, <8 x i64>* %__b
4399 %1 = bitcast <8 x i64> %load to <8 x i64>
4400 %2 = icmp eq <8 x i64> %0, %1
4401 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4402 %4 = bitcast <16 x i1> %3 to i16
4406 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4407 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4408 ; VLX: # %bb.0: # %entry
4409 ; VLX-NEXT: kmovd %edi, %k1
4410 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4411 ; VLX-NEXT: kmovd %k0, %eax
4412 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4413 ; VLX-NEXT: vzeroupper
4416 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4417 ; NoVLX: # %bb.0: # %entry
4418 ; NoVLX-NEXT: kmovw %edi, %k1
4419 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4420 ; NoVLX-NEXT: kmovw %k0, %eax
4421 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4422 ; NoVLX-NEXT: vzeroupper
4425 %0 = bitcast <8 x i64> %__a to <8 x i64>
4426 %1 = bitcast <8 x i64> %__b to <8 x i64>
4427 %2 = icmp eq <8 x i64> %0, %1
4428 %3 = bitcast i8 %__u to <8 x i1>
4429 %4 = and <8 x i1> %2, %3
4430 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4431 %6 = bitcast <16 x i1> %5 to i16
4435 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4436 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4437 ; VLX: # %bb.0: # %entry
4438 ; VLX-NEXT: kmovd %edi, %k1
4439 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4440 ; VLX-NEXT: kmovd %k0, %eax
4441 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4442 ; VLX-NEXT: vzeroupper
4445 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4446 ; NoVLX: # %bb.0: # %entry
4447 ; NoVLX-NEXT: kmovw %edi, %k1
4448 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4449 ; NoVLX-NEXT: kmovw %k0, %eax
4450 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4451 ; NoVLX-NEXT: vzeroupper
4454 %0 = bitcast <8 x i64> %__a to <8 x i64>
4455 %load = load <8 x i64>, <8 x i64>* %__b
4456 %1 = bitcast <8 x i64> %load to <8 x i64>
4457 %2 = icmp eq <8 x i64> %0, %1
4458 %3 = bitcast i8 %__u to <8 x i1>
4459 %4 = and <8 x i1> %2, %3
4460 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4461 %6 = bitcast <16 x i1> %5 to i16
4466 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4467 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4468 ; VLX: # %bb.0: # %entry
4469 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4470 ; VLX-NEXT: kmovd %k0, %eax
4471 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4472 ; VLX-NEXT: vzeroupper
4475 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4476 ; NoVLX: # %bb.0: # %entry
4477 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4478 ; NoVLX-NEXT: kmovw %k0, %eax
4479 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4480 ; NoVLX-NEXT: vzeroupper
4483 %0 = bitcast <8 x i64> %__a to <8 x i64>
4484 %load = load i64, i64* %__b
4485 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4486 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4487 %2 = icmp eq <8 x i64> %0, %1
4488 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4489 %4 = bitcast <16 x i1> %3 to i16
4493 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4494 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4495 ; VLX: # %bb.0: # %entry
4496 ; VLX-NEXT: kmovd %edi, %k1
4497 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4498 ; VLX-NEXT: kmovd %k0, %eax
4499 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4500 ; VLX-NEXT: vzeroupper
4503 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4504 ; NoVLX: # %bb.0: # %entry
4505 ; NoVLX-NEXT: kmovw %edi, %k1
4506 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4507 ; NoVLX-NEXT: kmovw %k0, %eax
4508 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4509 ; NoVLX-NEXT: vzeroupper
4512 %0 = bitcast <8 x i64> %__a to <8 x i64>
4513 %load = load i64, i64* %__b
4514 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4515 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4516 %2 = icmp eq <8 x i64> %0, %1
4517 %3 = bitcast i8 %__u to <8 x i1>
4518 %4 = and <8 x i1> %3, %2
4519 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4520 %6 = bitcast <16 x i1> %5 to i16
4525 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4526 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4527 ; VLX: # %bb.0: # %entry
4528 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4529 ; VLX-NEXT: kmovd %k0, %eax
4530 ; VLX-NEXT: vzeroupper
4533 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4534 ; NoVLX: # %bb.0: # %entry
4535 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4536 ; NoVLX-NEXT: kmovw %k0, %eax
4537 ; NoVLX-NEXT: vzeroupper
4540 %0 = bitcast <8 x i64> %__a to <8 x i64>
4541 %1 = bitcast <8 x i64> %__b to <8 x i64>
4542 %2 = icmp eq <8 x i64> %0, %1
4543 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4544 %4 = bitcast <32 x i1> %3 to i32
4548 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4549 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4550 ; VLX: # %bb.0: # %entry
4551 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4552 ; VLX-NEXT: kmovd %k0, %eax
4553 ; VLX-NEXT: vzeroupper
4556 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4557 ; NoVLX: # %bb.0: # %entry
4558 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4559 ; NoVLX-NEXT: kmovw %k0, %eax
4560 ; NoVLX-NEXT: vzeroupper
4563 %0 = bitcast <8 x i64> %__a to <8 x i64>
4564 %load = load <8 x i64>, <8 x i64>* %__b
4565 %1 = bitcast <8 x i64> %load to <8 x i64>
4566 %2 = icmp eq <8 x i64> %0, %1
4567 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4568 %4 = bitcast <32 x i1> %3 to i32
4572 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4573 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4574 ; VLX: # %bb.0: # %entry
4575 ; VLX-NEXT: kmovd %edi, %k1
4576 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4577 ; VLX-NEXT: kmovd %k0, %eax
4578 ; VLX-NEXT: vzeroupper
4581 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4582 ; NoVLX: # %bb.0: # %entry
4583 ; NoVLX-NEXT: kmovw %edi, %k1
4584 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4585 ; NoVLX-NEXT: kmovw %k0, %eax
4586 ; NoVLX-NEXT: vzeroupper
4589 %0 = bitcast <8 x i64> %__a to <8 x i64>
4590 %1 = bitcast <8 x i64> %__b to <8 x i64>
4591 %2 = icmp eq <8 x i64> %0, %1
4592 %3 = bitcast i8 %__u to <8 x i1>
4593 %4 = and <8 x i1> %2, %3
4594 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4595 %6 = bitcast <32 x i1> %5 to i32
4599 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4600 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4601 ; VLX: # %bb.0: # %entry
4602 ; VLX-NEXT: kmovd %edi, %k1
4603 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4604 ; VLX-NEXT: kmovd %k0, %eax
4605 ; VLX-NEXT: vzeroupper
4608 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4609 ; NoVLX: # %bb.0: # %entry
4610 ; NoVLX-NEXT: kmovw %edi, %k1
4611 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4612 ; NoVLX-NEXT: kmovw %k0, %eax
4613 ; NoVLX-NEXT: vzeroupper
4616 %0 = bitcast <8 x i64> %__a to <8 x i64>
4617 %load = load <8 x i64>, <8 x i64>* %__b
4618 %1 = bitcast <8 x i64> %load to <8 x i64>
4619 %2 = icmp eq <8 x i64> %0, %1
4620 %3 = bitcast i8 %__u to <8 x i1>
4621 %4 = and <8 x i1> %2, %3
4622 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4623 %6 = bitcast <32 x i1> %5 to i32
4628 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4629 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4630 ; VLX: # %bb.0: # %entry
4631 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4632 ; VLX-NEXT: kmovd %k0, %eax
4633 ; VLX-NEXT: vzeroupper
4636 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4637 ; NoVLX: # %bb.0: # %entry
4638 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4639 ; NoVLX-NEXT: kmovw %k0, %eax
4640 ; NoVLX-NEXT: vzeroupper
4643 %0 = bitcast <8 x i64> %__a to <8 x i64>
4644 %load = load i64, i64* %__b
4645 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4646 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4647 %2 = icmp eq <8 x i64> %0, %1
4648 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4649 %4 = bitcast <32 x i1> %3 to i32
4653 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4654 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4655 ; VLX: # %bb.0: # %entry
4656 ; VLX-NEXT: kmovd %edi, %k1
4657 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4658 ; VLX-NEXT: kmovd %k0, %eax
4659 ; VLX-NEXT: vzeroupper
4662 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4663 ; NoVLX: # %bb.0: # %entry
4664 ; NoVLX-NEXT: kmovw %edi, %k1
4665 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4666 ; NoVLX-NEXT: kmovw %k0, %eax
4667 ; NoVLX-NEXT: vzeroupper
4670 %0 = bitcast <8 x i64> %__a to <8 x i64>
4671 %load = load i64, i64* %__b
4672 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4673 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4674 %2 = icmp eq <8 x i64> %0, %1
4675 %3 = bitcast i8 %__u to <8 x i1>
4676 %4 = and <8 x i1> %3, %2
4677 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4678 %6 = bitcast <32 x i1> %5 to i32
4683 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4684 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4685 ; VLX: # %bb.0: # %entry
4686 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4687 ; VLX-NEXT: kmovq %k0, %rax
4688 ; VLX-NEXT: vzeroupper
4691 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4692 ; NoVLX: # %bb.0: # %entry
4693 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4694 ; NoVLX-NEXT: kmovw %k0, %eax
4695 ; NoVLX-NEXT: vzeroupper
4698 %0 = bitcast <8 x i64> %__a to <8 x i64>
4699 %1 = bitcast <8 x i64> %__b to <8 x i64>
4700 %2 = icmp eq <8 x i64> %0, %1
4701 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4702 %4 = bitcast <64 x i1> %3 to i64
4706 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4707 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4708 ; VLX: # %bb.0: # %entry
4709 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4710 ; VLX-NEXT: kmovq %k0, %rax
4711 ; VLX-NEXT: vzeroupper
4714 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4715 ; NoVLX: # %bb.0: # %entry
4716 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4717 ; NoVLX-NEXT: kmovw %k0, %eax
4718 ; NoVLX-NEXT: vzeroupper
4721 %0 = bitcast <8 x i64> %__a to <8 x i64>
4722 %load = load <8 x i64>, <8 x i64>* %__b
4723 %1 = bitcast <8 x i64> %load to <8 x i64>
4724 %2 = icmp eq <8 x i64> %0, %1
4725 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4726 %4 = bitcast <64 x i1> %3 to i64
4730 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4731 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4732 ; VLX: # %bb.0: # %entry
4733 ; VLX-NEXT: kmovd %edi, %k1
4734 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4735 ; VLX-NEXT: kmovq %k0, %rax
4736 ; VLX-NEXT: vzeroupper
4739 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4740 ; NoVLX: # %bb.0: # %entry
4741 ; NoVLX-NEXT: kmovw %edi, %k1
4742 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4743 ; NoVLX-NEXT: kmovw %k0, %eax
4744 ; NoVLX-NEXT: vzeroupper
4747 %0 = bitcast <8 x i64> %__a to <8 x i64>
4748 %1 = bitcast <8 x i64> %__b to <8 x i64>
4749 %2 = icmp eq <8 x i64> %0, %1
4750 %3 = bitcast i8 %__u to <8 x i1>
4751 %4 = and <8 x i1> %2, %3
4752 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4753 %6 = bitcast <64 x i1> %5 to i64
4757 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4758 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4759 ; VLX: # %bb.0: # %entry
4760 ; VLX-NEXT: kmovd %edi, %k1
4761 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4762 ; VLX-NEXT: kmovq %k0, %rax
4763 ; VLX-NEXT: vzeroupper
4766 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4767 ; NoVLX: # %bb.0: # %entry
4768 ; NoVLX-NEXT: kmovw %edi, %k1
4769 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4770 ; NoVLX-NEXT: kmovw %k0, %eax
4771 ; NoVLX-NEXT: vzeroupper
4774 %0 = bitcast <8 x i64> %__a to <8 x i64>
4775 %load = load <8 x i64>, <8 x i64>* %__b
4776 %1 = bitcast <8 x i64> %load to <8 x i64>
4777 %2 = icmp eq <8 x i64> %0, %1
4778 %3 = bitcast i8 %__u to <8 x i1>
4779 %4 = and <8 x i1> %2, %3
4780 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4781 %6 = bitcast <64 x i1> %5 to i64
4786 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4787 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4788 ; VLX: # %bb.0: # %entry
4789 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4790 ; VLX-NEXT: kmovq %k0, %rax
4791 ; VLX-NEXT: vzeroupper
4794 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4795 ; NoVLX: # %bb.0: # %entry
4796 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4797 ; NoVLX-NEXT: kmovw %k0, %eax
4798 ; NoVLX-NEXT: vzeroupper
4801 %0 = bitcast <8 x i64> %__a to <8 x i64>
4802 %load = load i64, i64* %__b
4803 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4804 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4805 %2 = icmp eq <8 x i64> %0, %1
4806 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4807 %4 = bitcast <64 x i1> %3 to i64
4811 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4812 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4813 ; VLX: # %bb.0: # %entry
4814 ; VLX-NEXT: kmovd %edi, %k1
4815 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4816 ; VLX-NEXT: kmovq %k0, %rax
4817 ; VLX-NEXT: vzeroupper
4820 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4821 ; NoVLX: # %bb.0: # %entry
4822 ; NoVLX-NEXT: kmovw %edi, %k1
4823 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4824 ; NoVLX-NEXT: kmovw %k0, %eax
4825 ; NoVLX-NEXT: vzeroupper
4828 %0 = bitcast <8 x i64> %__a to <8 x i64>
4829 %load = load i64, i64* %__b
4830 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4831 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4832 %2 = icmp eq <8 x i64> %0, %1
4833 %3 = bitcast i8 %__u to <8 x i1>
4834 %4 = and <8 x i1> %3, %2
4835 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4836 %6 = bitcast <64 x i1> %5 to i64
4841 define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4842 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4843 ; VLX: # %bb.0: # %entry
4844 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
4845 ; VLX-NEXT: kmovd %k0, %eax
4848 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4849 ; NoVLX: # %bb.0: # %entry
4850 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4851 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4852 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4853 ; NoVLX-NEXT: kmovw %k0, %eax
4854 ; NoVLX-NEXT: vzeroupper
4857 %0 = bitcast <2 x i64> %__a to <16 x i8>
4858 %1 = bitcast <2 x i64> %__b to <16 x i8>
4859 %2 = icmp sgt <16 x i8> %0, %1
4860 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4861 %4 = bitcast <32 x i1> %3 to i32
4865 define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4866 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4867 ; VLX: # %bb.0: # %entry
4868 ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
4869 ; VLX-NEXT: kmovd %k0, %eax
4872 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4873 ; NoVLX: # %bb.0: # %entry
4874 ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
4875 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4876 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4877 ; NoVLX-NEXT: kmovw %k0, %eax
4878 ; NoVLX-NEXT: vzeroupper
4881 %0 = bitcast <2 x i64> %__a to <16 x i8>
4882 %load = load <2 x i64>, <2 x i64>* %__b
4883 %1 = bitcast <2 x i64> %load to <16 x i8>
4884 %2 = icmp sgt <16 x i8> %0, %1
4885 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4886 %4 = bitcast <32 x i1> %3 to i32
4890 define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4891 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4892 ; VLX: # %bb.0: # %entry
4893 ; VLX-NEXT: kmovd %edi, %k1
4894 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
4895 ; VLX-NEXT: kmovd %k0, %eax
4898 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4899 ; NoVLX: # %bb.0: # %entry
4900 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4901 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4902 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4903 ; NoVLX-NEXT: kmovw %k0, %eax
4904 ; NoVLX-NEXT: andl %edi, %eax
4905 ; NoVLX-NEXT: vzeroupper
4908 %0 = bitcast <2 x i64> %__a to <16 x i8>
4909 %1 = bitcast <2 x i64> %__b to <16 x i8>
4910 %2 = icmp sgt <16 x i8> %0, %1
4911 %3 = bitcast i16 %__u to <16 x i1>
4912 %4 = and <16 x i1> %2, %3
4913 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4914 %6 = bitcast <32 x i1> %5 to i32
4918 define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4919 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4920 ; VLX: # %bb.0: # %entry
4921 ; VLX-NEXT: kmovd %edi, %k1
4922 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
4923 ; VLX-NEXT: kmovd %k0, %eax
4926 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4927 ; NoVLX: # %bb.0: # %entry
4928 ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
4929 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4930 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4931 ; NoVLX-NEXT: kmovw %k0, %eax
4932 ; NoVLX-NEXT: andl %edi, %eax
4933 ; NoVLX-NEXT: vzeroupper
4936 %0 = bitcast <2 x i64> %__a to <16 x i8>
4937 %load = load <2 x i64>, <2 x i64>* %__b
4938 %1 = bitcast <2 x i64> %load to <16 x i8>
4939 %2 = icmp sgt <16 x i8> %0, %1
4940 %3 = bitcast i16 %__u to <16 x i1>
4941 %4 = and <16 x i1> %2, %3
4942 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4943 %6 = bitcast <32 x i1> %5 to i32
4948 define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4949 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
4950 ; VLX: # %bb.0: # %entry
4951 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
4952 ; VLX-NEXT: kmovq %k0, %rax
4955 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
4956 ; NoVLX: # %bb.0: # %entry
4957 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4958 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4959 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4960 ; NoVLX-NEXT: kmovw %k0, %eax
4961 ; NoVLX-NEXT: vzeroupper
4964 %0 = bitcast <2 x i64> %__a to <16 x i8>
4965 %1 = bitcast <2 x i64> %__b to <16 x i8>
4966 %2 = icmp sgt <16 x i8> %0, %1
4967 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4968 %4 = bitcast <64 x i1> %3 to i64
4972 define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4973 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
4974 ; VLX: # %bb.0: # %entry
4975 ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
4976 ; VLX-NEXT: kmovq %k0, %rax
4979 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
4980 ; NoVLX: # %bb.0: # %entry
4981 ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
4982 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4983 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4984 ; NoVLX-NEXT: kmovw %k0, %eax
4985 ; NoVLX-NEXT: vzeroupper
4988 %0 = bitcast <2 x i64> %__a to <16 x i8>
4989 %load = load <2 x i64>, <2 x i64>* %__b
4990 %1 = bitcast <2 x i64> %load to <16 x i8>
4991 %2 = icmp sgt <16 x i8> %0, %1
4992 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4993 %4 = bitcast <64 x i1> %3 to i64
4997 define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4998 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
4999 ; VLX: # %bb.0: # %entry
5000 ; VLX-NEXT: kmovd %edi, %k1
5001 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
5002 ; VLX-NEXT: kmovq %k0, %rax
5005 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
5006 ; NoVLX: # %bb.0: # %entry
5007 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
5008 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5009 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5010 ; NoVLX-NEXT: kmovw %k0, %eax
5011 ; NoVLX-NEXT: andl %edi, %eax
5012 ; NoVLX-NEXT: vzeroupper
5015 %0 = bitcast <2 x i64> %__a to <16 x i8>
5016 %1 = bitcast <2 x i64> %__b to <16 x i8>
5017 %2 = icmp sgt <16 x i8> %0, %1
5018 %3 = bitcast i16 %__u to <16 x i1>
5019 %4 = and <16 x i1> %2, %3
5020 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5021 %6 = bitcast <64 x i1> %5 to i64
5025 define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5026 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5027 ; VLX: # %bb.0: # %entry
5028 ; VLX-NEXT: kmovd %edi, %k1
5029 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
5030 ; VLX-NEXT: kmovq %k0, %rax
5033 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5034 ; NoVLX: # %bb.0: # %entry
5035 ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
5036 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5037 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5038 ; NoVLX-NEXT: kmovw %k0, %eax
5039 ; NoVLX-NEXT: andl %edi, %eax
5040 ; NoVLX-NEXT: vzeroupper
5043 %0 = bitcast <2 x i64> %__a to <16 x i8>
5044 %load = load <2 x i64>, <2 x i64>* %__b
5045 %1 = bitcast <2 x i64> %load to <16 x i8>
5046 %2 = icmp sgt <16 x i8> %0, %1
5047 %3 = bitcast i16 %__u to <16 x i1>
5048 %4 = and <16 x i1> %2, %3
5049 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5050 %6 = bitcast <64 x i1> %5 to i64
5055 define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5056 ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5057 ; VLX: # %bb.0: # %entry
5058 ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
5059 ; VLX-NEXT: kmovq %k0, %rax
5060 ; VLX-NEXT: vzeroupper
5063 ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5064 ; NoVLX: # %bb.0: # %entry
5065 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
5066 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5067 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5068 ; NoVLX-NEXT: kmovw %k0, %ecx
5069 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5070 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5071 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5072 ; NoVLX-NEXT: kmovw %k0, %eax
5073 ; NoVLX-NEXT: shll $16, %eax
5074 ; NoVLX-NEXT: orl %ecx, %eax
5075 ; NoVLX-NEXT: vzeroupper
5078 %0 = bitcast <4 x i64> %__a to <32 x i8>
5079 %1 = bitcast <4 x i64> %__b to <32 x i8>
5080 %2 = icmp sgt <32 x i8> %0, %1
5081 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5082 %4 = bitcast <64 x i1> %3 to i64
5086 define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5087 ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5088 ; VLX: # %bb.0: # %entry
5089 ; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
5090 ; VLX-NEXT: kmovq %k0, %rax
5091 ; VLX-NEXT: vzeroupper
5094 ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5095 ; NoVLX: # %bb.0: # %entry
5096 ; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0
5097 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5098 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5099 ; NoVLX-NEXT: kmovw %k0, %ecx
5100 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5101 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5102 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5103 ; NoVLX-NEXT: kmovw %k0, %eax
5104 ; NoVLX-NEXT: shll $16, %eax
5105 ; NoVLX-NEXT: orl %ecx, %eax
5106 ; NoVLX-NEXT: vzeroupper
5109 %0 = bitcast <4 x i64> %__a to <32 x i8>
5110 %load = load <4 x i64>, <4 x i64>* %__b
5111 %1 = bitcast <4 x i64> %load to <32 x i8>
5112 %2 = icmp sgt <32 x i8> %0, %1
5113 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5114 %4 = bitcast <64 x i1> %3 to i64
5118 define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5119 ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5120 ; VLX: # %bb.0: # %entry
5121 ; VLX-NEXT: kmovd %edi, %k1
5122 ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
5123 ; VLX-NEXT: kmovq %k0, %rax
5124 ; VLX-NEXT: vzeroupper
5127 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5128 ; NoVLX: # %bb.0: # %entry
5129 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
5130 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5131 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5132 ; NoVLX-NEXT: kmovw %k0, %eax
5133 ; NoVLX-NEXT: andl %edi, %eax
5134 ; NoVLX-NEXT: shrl $16, %edi
5135 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5136 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5137 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5138 ; NoVLX-NEXT: kmovw %k0, %ecx
5139 ; NoVLX-NEXT: andl %edi, %ecx
5140 ; NoVLX-NEXT: shll $16, %ecx
5141 ; NoVLX-NEXT: movzwl %ax, %eax
5142 ; NoVLX-NEXT: orl %ecx, %eax
5143 ; NoVLX-NEXT: vzeroupper
5146 %0 = bitcast <4 x i64> %__a to <32 x i8>
5147 %1 = bitcast <4 x i64> %__b to <32 x i8>
5148 %2 = icmp sgt <32 x i8> %0, %1
5149 %3 = bitcast i32 %__u to <32 x i1>
5150 %4 = and <32 x i1> %2, %3
5151 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5152 %6 = bitcast <64 x i1> %5 to i64
5156 define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5157 ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5158 ; VLX: # %bb.0: # %entry
5159 ; VLX-NEXT: kmovd %edi, %k1
5160 ; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
5161 ; VLX-NEXT: kmovq %k0, %rax
5162 ; VLX-NEXT: vzeroupper
5165 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5166 ; NoVLX: # %bb.0: # %entry
5167 ; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
5168 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5169 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5170 ; NoVLX-NEXT: kmovw %k0, %eax
5171 ; NoVLX-NEXT: andl %edi, %eax
5172 ; NoVLX-NEXT: shrl $16, %edi
5173 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5174 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5175 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5176 ; NoVLX-NEXT: kmovw %k0, %ecx
5177 ; NoVLX-NEXT: andl %edi, %ecx
5178 ; NoVLX-NEXT: shll $16, %ecx
5179 ; NoVLX-NEXT: movzwl %ax, %eax
5180 ; NoVLX-NEXT: orl %ecx, %eax
5181 ; NoVLX-NEXT: vzeroupper
5184 %0 = bitcast <4 x i64> %__a to <32 x i8>
5185 %load = load <4 x i64>, <4 x i64>* %__b
5186 %1 = bitcast <4 x i64> %load to <32 x i8>
5187 %2 = icmp sgt <32 x i8> %0, %1
5188 %3 = bitcast i32 %__u to <32 x i1>
5189 %4 = and <32 x i1> %2, %3
5190 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5191 %6 = bitcast <64 x i1> %5 to i64
5196 define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5197 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5198 ; VLX: # %bb.0: # %entry
5199 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5200 ; VLX-NEXT: kmovd %k0, %eax
5201 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5204 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5205 ; NoVLX: # %bb.0: # %entry
5206 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5207 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5208 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5209 ; NoVLX-NEXT: kmovw %k0, %eax
5210 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5211 ; NoVLX-NEXT: vzeroupper
5214 %0 = bitcast <2 x i64> %__a to <8 x i16>
5215 %1 = bitcast <2 x i64> %__b to <8 x i16>
5216 %2 = icmp sgt <8 x i16> %0, %1
5217 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5218 %4 = bitcast <16 x i1> %3 to i16
5222 define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5223 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5224 ; VLX: # %bb.0: # %entry
5225 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5226 ; VLX-NEXT: kmovd %k0, %eax
5227 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5230 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5231 ; NoVLX: # %bb.0: # %entry
5232 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5233 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5234 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5235 ; NoVLX-NEXT: kmovw %k0, %eax
5236 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5237 ; NoVLX-NEXT: vzeroupper
5240 %0 = bitcast <2 x i64> %__a to <8 x i16>
5241 %load = load <2 x i64>, <2 x i64>* %__b
5242 %1 = bitcast <2 x i64> %load to <8 x i16>
5243 %2 = icmp sgt <8 x i16> %0, %1
5244 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5245 %4 = bitcast <16 x i1> %3 to i16
5249 define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5250 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5251 ; VLX: # %bb.0: # %entry
5252 ; VLX-NEXT: kmovd %edi, %k1
5253 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5254 ; VLX-NEXT: kmovd %k0, %eax
5255 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5258 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5259 ; NoVLX: # %bb.0: # %entry
5260 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5261 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5262 ; NoVLX-NEXT: kmovw %edi, %k1
5263 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5264 ; NoVLX-NEXT: kmovw %k0, %eax
5265 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5266 ; NoVLX-NEXT: vzeroupper
5269 %0 = bitcast <2 x i64> %__a to <8 x i16>
5270 %1 = bitcast <2 x i64> %__b to <8 x i16>
5271 %2 = icmp sgt <8 x i16> %0, %1
5272 %3 = bitcast i8 %__u to <8 x i1>
5273 %4 = and <8 x i1> %2, %3
5274 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5275 %6 = bitcast <16 x i1> %5 to i16
5279 define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5280 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5281 ; VLX: # %bb.0: # %entry
5282 ; VLX-NEXT: kmovd %edi, %k1
5283 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5284 ; VLX-NEXT: kmovd %k0, %eax
5285 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5288 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5289 ; NoVLX: # %bb.0: # %entry
5290 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5291 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5292 ; NoVLX-NEXT: kmovw %edi, %k1
5293 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5294 ; NoVLX-NEXT: kmovw %k0, %eax
5295 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5296 ; NoVLX-NEXT: vzeroupper
5299 %0 = bitcast <2 x i64> %__a to <8 x i16>
5300 %load = load <2 x i64>, <2 x i64>* %__b
5301 %1 = bitcast <2 x i64> %load to <8 x i16>
5302 %2 = icmp sgt <8 x i16> %0, %1
5303 %3 = bitcast i8 %__u to <8 x i1>
5304 %4 = and <8 x i1> %2, %3
5305 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5306 %6 = bitcast <16 x i1> %5 to i16
5311 define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5312 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5313 ; VLX: # %bb.0: # %entry
5314 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5315 ; VLX-NEXT: kmovd %k0, %eax
5318 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5319 ; NoVLX: # %bb.0: # %entry
5320 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5321 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5322 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5323 ; NoVLX-NEXT: kmovw %k0, %eax
5324 ; NoVLX-NEXT: vzeroupper
5327 %0 = bitcast <2 x i64> %__a to <8 x i16>
5328 %1 = bitcast <2 x i64> %__b to <8 x i16>
5329 %2 = icmp sgt <8 x i16> %0, %1
5330 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5331 %4 = bitcast <32 x i1> %3 to i32
5335 define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5336 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5337 ; VLX: # %bb.0: # %entry
5338 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5339 ; VLX-NEXT: kmovd %k0, %eax
5342 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5343 ; NoVLX: # %bb.0: # %entry
5344 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5345 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5346 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5347 ; NoVLX-NEXT: kmovw %k0, %eax
5348 ; NoVLX-NEXT: vzeroupper
5351 %0 = bitcast <2 x i64> %__a to <8 x i16>
5352 %load = load <2 x i64>, <2 x i64>* %__b
5353 %1 = bitcast <2 x i64> %load to <8 x i16>
5354 %2 = icmp sgt <8 x i16> %0, %1
5355 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5356 %4 = bitcast <32 x i1> %3 to i32
5360 define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5361 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5362 ; VLX: # %bb.0: # %entry
5363 ; VLX-NEXT: kmovd %edi, %k1
5364 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5365 ; VLX-NEXT: kmovd %k0, %eax
5368 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5369 ; NoVLX: # %bb.0: # %entry
5370 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5371 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5372 ; NoVLX-NEXT: kmovw %edi, %k1
5373 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5374 ; NoVLX-NEXT: kmovw %k0, %eax
5375 ; NoVLX-NEXT: vzeroupper
5378 %0 = bitcast <2 x i64> %__a to <8 x i16>
5379 %1 = bitcast <2 x i64> %__b to <8 x i16>
5380 %2 = icmp sgt <8 x i16> %0, %1
5381 %3 = bitcast i8 %__u to <8 x i1>
5382 %4 = and <8 x i1> %2, %3
5383 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5384 %6 = bitcast <32 x i1> %5 to i32
5388 define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5389 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5390 ; VLX: # %bb.0: # %entry
5391 ; VLX-NEXT: kmovd %edi, %k1
5392 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5393 ; VLX-NEXT: kmovd %k0, %eax
5396 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5397 ; NoVLX: # %bb.0: # %entry
5398 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5399 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5400 ; NoVLX-NEXT: kmovw %edi, %k1
5401 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5402 ; NoVLX-NEXT: kmovw %k0, %eax
5403 ; NoVLX-NEXT: vzeroupper
5406 %0 = bitcast <2 x i64> %__a to <8 x i16>
5407 %load = load <2 x i64>, <2 x i64>* %__b
5408 %1 = bitcast <2 x i64> %load to <8 x i16>
5409 %2 = icmp sgt <8 x i16> %0, %1
5410 %3 = bitcast i8 %__u to <8 x i1>
5411 %4 = and <8 x i1> %2, %3
5412 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5413 %6 = bitcast <32 x i1> %5 to i32
5418 define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5419 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5420 ; VLX: # %bb.0: # %entry
5421 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5422 ; VLX-NEXT: kmovq %k0, %rax
5425 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5426 ; NoVLX: # %bb.0: # %entry
5427 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5428 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5429 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5430 ; NoVLX-NEXT: kmovw %k0, %eax
5431 ; NoVLX-NEXT: vzeroupper
5434 %0 = bitcast <2 x i64> %__a to <8 x i16>
5435 %1 = bitcast <2 x i64> %__b to <8 x i16>
5436 %2 = icmp sgt <8 x i16> %0, %1
5437 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5438 %4 = bitcast <64 x i1> %3 to i64
5442 define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5443 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5444 ; VLX: # %bb.0: # %entry
5445 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5446 ; VLX-NEXT: kmovq %k0, %rax
5449 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5450 ; NoVLX: # %bb.0: # %entry
5451 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5452 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5453 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5454 ; NoVLX-NEXT: kmovw %k0, %eax
5455 ; NoVLX-NEXT: vzeroupper
5458 %0 = bitcast <2 x i64> %__a to <8 x i16>
5459 %load = load <2 x i64>, <2 x i64>* %__b
5460 %1 = bitcast <2 x i64> %load to <8 x i16>
5461 %2 = icmp sgt <8 x i16> %0, %1
5462 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5463 %4 = bitcast <64 x i1> %3 to i64
5467 define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5468 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5469 ; VLX: # %bb.0: # %entry
5470 ; VLX-NEXT: kmovd %edi, %k1
5471 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5472 ; VLX-NEXT: kmovq %k0, %rax
5475 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5476 ; NoVLX: # %bb.0: # %entry
5477 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5478 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5479 ; NoVLX-NEXT: kmovw %edi, %k1
5480 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5481 ; NoVLX-NEXT: kmovw %k0, %eax
5482 ; NoVLX-NEXT: vzeroupper
5485 %0 = bitcast <2 x i64> %__a to <8 x i16>
5486 %1 = bitcast <2 x i64> %__b to <8 x i16>
5487 %2 = icmp sgt <8 x i16> %0, %1
5488 %3 = bitcast i8 %__u to <8 x i1>
5489 %4 = and <8 x i1> %2, %3
5490 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5491 %6 = bitcast <64 x i1> %5 to i64
5495 define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5496 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5497 ; VLX: # %bb.0: # %entry
5498 ; VLX-NEXT: kmovd %edi, %k1
5499 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5500 ; VLX-NEXT: kmovq %k0, %rax
5503 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5504 ; NoVLX: # %bb.0: # %entry
5505 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5506 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5507 ; NoVLX-NEXT: kmovw %edi, %k1
5508 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5509 ; NoVLX-NEXT: kmovw %k0, %eax
5510 ; NoVLX-NEXT: vzeroupper
5513 %0 = bitcast <2 x i64> %__a to <8 x i16>
5514 %load = load <2 x i64>, <2 x i64>* %__b
5515 %1 = bitcast <2 x i64> %load to <8 x i16>
5516 %2 = icmp sgt <8 x i16> %0, %1
5517 %3 = bitcast i8 %__u to <8 x i1>
5518 %4 = and <8 x i1> %2, %3
5519 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5520 %6 = bitcast <64 x i1> %5 to i64
5525 define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5526 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5527 ; VLX: # %bb.0: # %entry
5528 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
5529 ; VLX-NEXT: kmovd %k0, %eax
5530 ; VLX-NEXT: vzeroupper
5533 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5534 ; NoVLX: # %bb.0: # %entry
5535 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5536 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5537 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5538 ; NoVLX-NEXT: kmovw %k0, %eax
5539 ; NoVLX-NEXT: vzeroupper
5542 %0 = bitcast <4 x i64> %__a to <16 x i16>
5543 %1 = bitcast <4 x i64> %__b to <16 x i16>
5544 %2 = icmp sgt <16 x i16> %0, %1
5545 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5546 %4 = bitcast <32 x i1> %3 to i32
5550 define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5551 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5552 ; VLX: # %bb.0: # %entry
5553 ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
5554 ; VLX-NEXT: kmovd %k0, %eax
5555 ; VLX-NEXT: vzeroupper
5558 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5559 ; NoVLX: # %bb.0: # %entry
5560 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5561 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5562 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5563 ; NoVLX-NEXT: kmovw %k0, %eax
5564 ; NoVLX-NEXT: vzeroupper
5567 %0 = bitcast <4 x i64> %__a to <16 x i16>
5568 %load = load <4 x i64>, <4 x i64>* %__b
5569 %1 = bitcast <4 x i64> %load to <16 x i16>
5570 %2 = icmp sgt <16 x i16> %0, %1
5571 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5572 %4 = bitcast <32 x i1> %3 to i32
5576 define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5577 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5578 ; VLX: # %bb.0: # %entry
5579 ; VLX-NEXT: kmovd %edi, %k1
5580 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5581 ; VLX-NEXT: kmovd %k0, %eax
5582 ; VLX-NEXT: vzeroupper
5585 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5586 ; NoVLX: # %bb.0: # %entry
5587 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5588 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5589 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5590 ; NoVLX-NEXT: kmovw %k0, %eax
5591 ; NoVLX-NEXT: andl %edi, %eax
5592 ; NoVLX-NEXT: vzeroupper
5595 %0 = bitcast <4 x i64> %__a to <16 x i16>
5596 %1 = bitcast <4 x i64> %__b to <16 x i16>
5597 %2 = icmp sgt <16 x i16> %0, %1
5598 %3 = bitcast i16 %__u to <16 x i1>
5599 %4 = and <16 x i1> %2, %3
5600 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5601 %6 = bitcast <32 x i1> %5 to i32
5605 define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5606 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5607 ; VLX: # %bb.0: # %entry
5608 ; VLX-NEXT: kmovd %edi, %k1
5609 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5610 ; VLX-NEXT: kmovd %k0, %eax
5611 ; VLX-NEXT: vzeroupper
5614 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5615 ; NoVLX: # %bb.0: # %entry
5616 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
5617 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5618 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5619 ; NoVLX-NEXT: kmovw %k0, %eax
5620 ; NoVLX-NEXT: andl %edi, %eax
5621 ; NoVLX-NEXT: vzeroupper
5624 %0 = bitcast <4 x i64> %__a to <16 x i16>
5625 %load = load <4 x i64>, <4 x i64>* %__b
5626 %1 = bitcast <4 x i64> %load to <16 x i16>
5627 %2 = icmp sgt <16 x i16> %0, %1
5628 %3 = bitcast i16 %__u to <16 x i1>
5629 %4 = and <16 x i1> %2, %3
5630 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5631 %6 = bitcast <32 x i1> %5 to i32
5636 define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5637 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5638 ; VLX: # %bb.0: # %entry
5639 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
5640 ; VLX-NEXT: kmovq %k0, %rax
5641 ; VLX-NEXT: vzeroupper
5644 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5645 ; NoVLX: # %bb.0: # %entry
5646 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5647 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5648 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5649 ; NoVLX-NEXT: kmovw %k0, %eax
5650 ; NoVLX-NEXT: vzeroupper
5653 %0 = bitcast <4 x i64> %__a to <16 x i16>
5654 %1 = bitcast <4 x i64> %__b to <16 x i16>
5655 %2 = icmp sgt <16 x i16> %0, %1
5656 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5657 %4 = bitcast <64 x i1> %3 to i64
5661 define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5662 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5663 ; VLX: # %bb.0: # %entry
5664 ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
5665 ; VLX-NEXT: kmovq %k0, %rax
5666 ; VLX-NEXT: vzeroupper
5669 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5670 ; NoVLX: # %bb.0: # %entry
5671 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5672 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5673 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5674 ; NoVLX-NEXT: kmovw %k0, %eax
5675 ; NoVLX-NEXT: vzeroupper
5678 %0 = bitcast <4 x i64> %__a to <16 x i16>
5679 %load = load <4 x i64>, <4 x i64>* %__b
5680 %1 = bitcast <4 x i64> %load to <16 x i16>
5681 %2 = icmp sgt <16 x i16> %0, %1
5682 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5683 %4 = bitcast <64 x i1> %3 to i64
5687 define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5688 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5689 ; VLX: # %bb.0: # %entry
5690 ; VLX-NEXT: kmovd %edi, %k1
5691 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5692 ; VLX-NEXT: kmovq %k0, %rax
5693 ; VLX-NEXT: vzeroupper
5696 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5697 ; NoVLX: # %bb.0: # %entry
5698 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5699 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5700 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5701 ; NoVLX-NEXT: kmovw %k0, %eax
5702 ; NoVLX-NEXT: andl %edi, %eax
5703 ; NoVLX-NEXT: vzeroupper
5706 %0 = bitcast <4 x i64> %__a to <16 x i16>
5707 %1 = bitcast <4 x i64> %__b to <16 x i16>
5708 %2 = icmp sgt <16 x i16> %0, %1
5709 %3 = bitcast i16 %__u to <16 x i1>
5710 %4 = and <16 x i1> %2, %3
5711 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5712 %6 = bitcast <64 x i1> %5 to i64
5716 define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5717 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5718 ; VLX: # %bb.0: # %entry
5719 ; VLX-NEXT: kmovd %edi, %k1
5720 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5721 ; VLX-NEXT: kmovq %k0, %rax
5722 ; VLX-NEXT: vzeroupper
5725 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5726 ; NoVLX: # %bb.0: # %entry
5727 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
5728 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5729 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5730 ; NoVLX-NEXT: kmovw %k0, %eax
5731 ; NoVLX-NEXT: andl %edi, %eax
5732 ; NoVLX-NEXT: vzeroupper
5735 %0 = bitcast <4 x i64> %__a to <16 x i16>
5736 %load = load <4 x i64>, <4 x i64>* %__b
5737 %1 = bitcast <4 x i64> %load to <16 x i16>
5738 %2 = icmp sgt <16 x i16> %0, %1
5739 %3 = bitcast i16 %__u to <16 x i1>
5740 %4 = and <16 x i1> %2, %3
5741 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5742 %6 = bitcast <64 x i1> %5 to i64
5747 define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5748 ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5749 ; VLX: # %bb.0: # %entry
5750 ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
5751 ; VLX-NEXT: kmovq %k0, %rax
5752 ; VLX-NEXT: vzeroupper
5755 ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5756 ; NoVLX: # %bb.0: # %entry
5757 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
5758 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
5759 ; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
5760 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5761 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5762 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5763 ; NoVLX-NEXT: kmovw %k0, %ecx
5764 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
5765 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5766 ; NoVLX-NEXT: kmovw %k0, %eax
5767 ; NoVLX-NEXT: shll $16, %eax
5768 ; NoVLX-NEXT: orl %ecx, %eax
5769 ; NoVLX-NEXT: vzeroupper
5772 %0 = bitcast <8 x i64> %__a to <32 x i16>
5773 %1 = bitcast <8 x i64> %__b to <32 x i16>
5774 %2 = icmp sgt <32 x i16> %0, %1
5775 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5776 %4 = bitcast <64 x i1> %3 to i64
5780 define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
5781 ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5782 ; VLX: # %bb.0: # %entry
5783 ; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0
5784 ; VLX-NEXT: kmovq %k0, %rax
5785 ; VLX-NEXT: vzeroupper
5788 ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5789 ; NoVLX: # %bb.0: # %entry
5790 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
5791 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5792 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5793 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5794 ; NoVLX-NEXT: kmovw %k0, %ecx
5795 ; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm0
5796 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5797 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5798 ; NoVLX-NEXT: kmovw %k0, %eax
5799 ; NoVLX-NEXT: shll $16, %eax
5800 ; NoVLX-NEXT: orl %ecx, %eax
5801 ; NoVLX-NEXT: vzeroupper
5804 %0 = bitcast <8 x i64> %__a to <32 x i16>
5805 %load = load <8 x i64>, <8 x i64>* %__b
5806 %1 = bitcast <8 x i64> %load to <32 x i16>
5807 %2 = icmp sgt <32 x i16> %0, %1
5808 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5809 %4 = bitcast <64 x i1> %3 to i64
5813 define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5814 ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5815 ; VLX: # %bb.0: # %entry
5816 ; VLX-NEXT: kmovd %edi, %k1
5817 ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
5818 ; VLX-NEXT: kmovq %k0, %rax
5819 ; VLX-NEXT: vzeroupper
5822 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5823 ; NoVLX: # %bb.0: # %entry
5824 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2
5825 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
5826 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
5827 ; NoVLX-NEXT: kmovw %k0, %eax
5828 ; NoVLX-NEXT: andl %edi, %eax
5829 ; NoVLX-NEXT: shrl $16, %edi
5830 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
5831 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
5832 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5833 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5834 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5835 ; NoVLX-NEXT: kmovw %k0, %ecx
5836 ; NoVLX-NEXT: andl %edi, %ecx
5837 ; NoVLX-NEXT: shll $16, %ecx
5838 ; NoVLX-NEXT: movzwl %ax, %eax
5839 ; NoVLX-NEXT: orl %ecx, %eax
5840 ; NoVLX-NEXT: vzeroupper
5843 %0 = bitcast <8 x i64> %__a to <32 x i16>
5844 %1 = bitcast <8 x i64> %__b to <32 x i16>
5845 %2 = icmp sgt <32 x i16> %0, %1
5846 %3 = bitcast i32 %__u to <32 x i1>
5847 %4 = and <32 x i1> %2, %3
5848 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5849 %6 = bitcast <64 x i1> %5 to i64
5853 define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
5854 ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5855 ; VLX: # %bb.0: # %entry
5856 ; VLX-NEXT: kmovd %edi, %k1
5857 ; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
5858 ; VLX-NEXT: kmovq %k0, %rax
5859 ; VLX-NEXT: vzeroupper
5862 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5863 ; NoVLX: # %bb.0: # %entry
5864 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm1
5865 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
5866 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5867 ; NoVLX-NEXT: kmovw %k0, %eax
5868 ; NoVLX-NEXT: andl %edi, %eax
5869 ; NoVLX-NEXT: shrl $16, %edi
5870 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
5871 ; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0
5872 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5873 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5874 ; NoVLX-NEXT: kmovw %k0, %ecx
5875 ; NoVLX-NEXT: andl %edi, %ecx
5876 ; NoVLX-NEXT: shll $16, %ecx
5877 ; NoVLX-NEXT: movzwl %ax, %eax
5878 ; NoVLX-NEXT: orl %ecx, %eax
5879 ; NoVLX-NEXT: vzeroupper
5882 %0 = bitcast <8 x i64> %__a to <32 x i16>
5883 %load = load <8 x i64>, <8 x i64>* %__b
5884 %1 = bitcast <8 x i64> %load to <32 x i16>
5885 %2 = icmp sgt <32 x i16> %0, %1
5886 %3 = bitcast i32 %__u to <32 x i1>
5887 %4 = and <32 x i1> %2, %3
5888 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5889 %6 = bitcast <64 x i1> %5 to i64
5894 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5895 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5896 ; VLX: # %bb.0: # %entry
5897 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
5898 ; VLX-NEXT: kmovd %k0, %eax
5899 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5902 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5903 ; NoVLX: # %bb.0: # %entry
5904 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
5905 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5906 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
5907 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5908 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5909 ; NoVLX-NEXT: kmovw %k0, %eax
5910 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5911 ; NoVLX-NEXT: vzeroupper
5914 %0 = bitcast <2 x i64> %__a to <4 x i32>
5915 %1 = bitcast <2 x i64> %__b to <4 x i32>
5916 %2 = icmp sgt <4 x i32> %0, %1
5917 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5918 %4 = bitcast <8 x i1> %3 to i8
5922 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5923 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5924 ; VLX: # %bb.0: # %entry
5925 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
5926 ; VLX-NEXT: kmovd %k0, %eax
5927 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5930 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5931 ; NoVLX: # %bb.0: # %entry
5932 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5933 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
5934 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
5935 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5936 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5937 ; NoVLX-NEXT: kmovw %k0, %eax
5938 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5939 ; NoVLX-NEXT: vzeroupper
5942 %0 = bitcast <2 x i64> %__a to <4 x i32>
5943 %load = load <2 x i64>, <2 x i64>* %__b
5944 %1 = bitcast <2 x i64> %load to <4 x i32>
5945 %2 = icmp sgt <4 x i32> %0, %1
5946 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5947 %4 = bitcast <8 x i1> %3 to i8
5951 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5952 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
5953 ; VLX: # %bb.0: # %entry
5954 ; VLX-NEXT: kmovd %edi, %k1
5955 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
5956 ; VLX-NEXT: kmovd %k0, %eax
5957 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5960 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
5961 ; NoVLX: # %bb.0: # %entry
5962 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
5963 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5964 ; NoVLX-NEXT: kmovw %edi, %k1
5965 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
5966 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5967 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5968 ; NoVLX-NEXT: kmovw %k0, %eax
5969 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5970 ; NoVLX-NEXT: vzeroupper
5973 %0 = bitcast <2 x i64> %__a to <4 x i32>
5974 %1 = bitcast <2 x i64> %__b to <4 x i32>
5975 %2 = icmp sgt <4 x i32> %0, %1
5976 %3 = bitcast i8 %__u to <8 x i1>
5977 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5978 %4 = and <4 x i1> %2, %extract.i
5979 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5980 %6 = bitcast <8 x i1> %5 to i8
5984 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5985 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
5986 ; VLX: # %bb.0: # %entry
5987 ; VLX-NEXT: kmovd %edi, %k1
5988 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
5989 ; VLX-NEXT: kmovd %k0, %eax
5990 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5993 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
5994 ; NoVLX: # %bb.0: # %entry
5995 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5996 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
5997 ; NoVLX-NEXT: kmovw %edi, %k1
5998 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
5999 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6000 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6001 ; NoVLX-NEXT: kmovw %k0, %eax
6002 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6003 ; NoVLX-NEXT: vzeroupper
6006 %0 = bitcast <2 x i64> %__a to <4 x i32>
6007 %load = load <2 x i64>, <2 x i64>* %__b
6008 %1 = bitcast <2 x i64> %load to <4 x i32>
6009 %2 = icmp sgt <4 x i32> %0, %1
6010 %3 = bitcast i8 %__u to <8 x i1>
6011 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6012 %4 = and <4 x i1> %2, %extract.i
6013 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6014 %6 = bitcast <8 x i1> %5 to i8
6019 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6020 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6021 ; VLX: # %bb.0: # %entry
6022 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6023 ; VLX-NEXT: kmovd %k0, %eax
6024 ; VLX-NEXT: # kill: def $al killed $al killed $eax
6027 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6028 ; NoVLX: # %bb.0: # %entry
6029 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6030 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
6031 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6032 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6033 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6034 ; NoVLX-NEXT: kmovw %k0, %eax
6035 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6036 ; NoVLX-NEXT: vzeroupper
6039 %0 = bitcast <2 x i64> %__a to <4 x i32>
6040 %load = load i32, i32* %__b
6041 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6042 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6043 %2 = icmp sgt <4 x i32> %0, %1
6044 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6045 %4 = bitcast <8 x i1> %3 to i8
6049 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6050 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6051 ; VLX: # %bb.0: # %entry
6052 ; VLX-NEXT: kmovd %edi, %k1
6053 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6054 ; VLX-NEXT: kmovd %k0, %eax
6055 ; VLX-NEXT: # kill: def $al killed $al killed $eax
6058 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6059 ; NoVLX: # %bb.0: # %entry
6060 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6061 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
6062 ; NoVLX-NEXT: kmovw %edi, %k1
6063 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6064 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6065 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6066 ; NoVLX-NEXT: kmovw %k0, %eax
6067 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6068 ; NoVLX-NEXT: vzeroupper
6071 %0 = bitcast <2 x i64> %__a to <4 x i32>
6072 %load = load i32, i32* %__b
6073 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6074 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6075 %2 = icmp sgt <4 x i32> %0, %1
6076 %3 = bitcast i8 %__u to <8 x i1>
6077 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6078 %4 = and <4 x i1> %extract.i, %2
6079 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6080 %6 = bitcast <8 x i1> %5 to i8
6085 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6086 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6087 ; VLX: # %bb.0: # %entry
6088 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6089 ; VLX-NEXT: kmovd %k0, %eax
6090 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6093 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6094 ; NoVLX: # %bb.0: # %entry
6095 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6096 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6097 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6098 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6099 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6100 ; NoVLX-NEXT: kmovw %k0, %eax
6101 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6102 ; NoVLX-NEXT: vzeroupper
6105 %0 = bitcast <2 x i64> %__a to <4 x i32>
6106 %1 = bitcast <2 x i64> %__b to <4 x i32>
6107 %2 = icmp sgt <4 x i32> %0, %1
6108 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6109 %4 = bitcast <16 x i1> %3 to i16
6113 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6114 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6115 ; VLX: # %bb.0: # %entry
6116 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6117 ; VLX-NEXT: kmovd %k0, %eax
6118 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6121 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6122 ; NoVLX: # %bb.0: # %entry
6123 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6124 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6125 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6126 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6127 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6128 ; NoVLX-NEXT: kmovw %k0, %eax
6129 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6130 ; NoVLX-NEXT: vzeroupper
6133 %0 = bitcast <2 x i64> %__a to <4 x i32>
6134 %load = load <2 x i64>, <2 x i64>* %__b
6135 %1 = bitcast <2 x i64> %load to <4 x i32>
6136 %2 = icmp sgt <4 x i32> %0, %1
6137 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6138 %4 = bitcast <16 x i1> %3 to i16
6142 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6143 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6144 ; VLX: # %bb.0: # %entry
6145 ; VLX-NEXT: kmovd %edi, %k1
6146 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6147 ; VLX-NEXT: kmovd %k0, %eax
6148 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6151 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6152 ; NoVLX: # %bb.0: # %entry
6153 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6154 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6155 ; NoVLX-NEXT: kmovw %edi, %k1
6156 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6157 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6158 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6159 ; NoVLX-NEXT: kmovw %k0, %eax
6160 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6161 ; NoVLX-NEXT: vzeroupper
6164 %0 = bitcast <2 x i64> %__a to <4 x i32>
6165 %1 = bitcast <2 x i64> %__b to <4 x i32>
6166 %2 = icmp sgt <4 x i32> %0, %1
6167 %3 = bitcast i8 %__u to <8 x i1>
6168 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6169 %4 = and <4 x i1> %2, %extract.i
6170 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6171 %6 = bitcast <16 x i1> %5 to i16
6175 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6176 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6177 ; VLX: # %bb.0: # %entry
6178 ; VLX-NEXT: kmovd %edi, %k1
6179 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6180 ; VLX-NEXT: kmovd %k0, %eax
6181 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6184 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6185 ; NoVLX: # %bb.0: # %entry
6186 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6187 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6188 ; NoVLX-NEXT: kmovw %edi, %k1
6189 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6190 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6191 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6192 ; NoVLX-NEXT: kmovw %k0, %eax
6193 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6194 ; NoVLX-NEXT: vzeroupper
6197 %0 = bitcast <2 x i64> %__a to <4 x i32>
6198 %load = load <2 x i64>, <2 x i64>* %__b
6199 %1 = bitcast <2 x i64> %load to <4 x i32>
6200 %2 = icmp sgt <4 x i32> %0, %1
6201 %3 = bitcast i8 %__u to <8 x i1>
6202 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6203 %4 = and <4 x i1> %2, %extract.i
6204 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6205 %6 = bitcast <16 x i1> %5 to i16
6210 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6211 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6212 ; VLX: # %bb.0: # %entry
6213 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6214 ; VLX-NEXT: kmovd %k0, %eax
6215 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6218 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6219 ; NoVLX: # %bb.0: # %entry
6220 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6221 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
6222 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6223 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6224 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6225 ; NoVLX-NEXT: kmovw %k0, %eax
6226 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6227 ; NoVLX-NEXT: vzeroupper
6230 %0 = bitcast <2 x i64> %__a to <4 x i32>
6231 %load = load i32, i32* %__b
6232 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6233 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6234 %2 = icmp sgt <4 x i32> %0, %1
6235 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6236 %4 = bitcast <16 x i1> %3 to i16
6240 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6241 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6242 ; VLX: # %bb.0: # %entry
6243 ; VLX-NEXT: kmovd %edi, %k1
6244 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6245 ; VLX-NEXT: kmovd %k0, %eax
6246 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6249 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6250 ; NoVLX: # %bb.0: # %entry
6251 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6252 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
6253 ; NoVLX-NEXT: kmovw %edi, %k1
6254 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6255 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6256 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6257 ; NoVLX-NEXT: kmovw %k0, %eax
6258 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6259 ; NoVLX-NEXT: vzeroupper
6262 %0 = bitcast <2 x i64> %__a to <4 x i32>
6263 %load = load i32, i32* %__b
6264 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6265 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6266 %2 = icmp sgt <4 x i32> %0, %1
6267 %3 = bitcast i8 %__u to <8 x i1>
6268 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6269 %4 = and <4 x i1> %extract.i, %2
6270 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6271 %6 = bitcast <16 x i1> %5 to i16
6276 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6277 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6278 ; VLX: # %bb.0: # %entry
6279 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6280 ; VLX-NEXT: kmovd %k0, %eax
6283 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6284 ; NoVLX: # %bb.0: # %entry
6285 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6286 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6287 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6288 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6289 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6290 ; NoVLX-NEXT: kmovw %k0, %eax
6291 ; NoVLX-NEXT: vzeroupper
6294 %0 = bitcast <2 x i64> %__a to <4 x i32>
6295 %1 = bitcast <2 x i64> %__b to <4 x i32>
6296 %2 = icmp sgt <4 x i32> %0, %1
6297 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6298 %4 = bitcast <32 x i1> %3 to i32
6302 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6303 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6304 ; VLX: # %bb.0: # %entry
6305 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6306 ; VLX-NEXT: kmovd %k0, %eax
6309 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6310 ; NoVLX: # %bb.0: # %entry
6311 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6312 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6313 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6314 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6315 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6316 ; NoVLX-NEXT: kmovw %k0, %eax
6317 ; NoVLX-NEXT: vzeroupper
6320 %0 = bitcast <2 x i64> %__a to <4 x i32>
6321 %load = load <2 x i64>, <2 x i64>* %__b
6322 %1 = bitcast <2 x i64> %load to <4 x i32>
6323 %2 = icmp sgt <4 x i32> %0, %1
6324 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6325 %4 = bitcast <32 x i1> %3 to i32
6329 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6330 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6331 ; VLX: # %bb.0: # %entry
6332 ; VLX-NEXT: kmovd %edi, %k1
6333 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6334 ; VLX-NEXT: kmovd %k0, %eax
6337 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6338 ; NoVLX: # %bb.0: # %entry
6339 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6340 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6341 ; NoVLX-NEXT: kmovw %edi, %k1
6342 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6343 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6344 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6345 ; NoVLX-NEXT: kmovw %k0, %eax
6346 ; NoVLX-NEXT: vzeroupper
6349 %0 = bitcast <2 x i64> %__a to <4 x i32>
6350 %1 = bitcast <2 x i64> %__b to <4 x i32>
6351 %2 = icmp sgt <4 x i32> %0, %1
6352 %3 = bitcast i8 %__u to <8 x i1>
6353 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6354 %4 = and <4 x i1> %2, %extract.i
6355 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6356 %6 = bitcast <32 x i1> %5 to i32
6360 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6361 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6362 ; VLX: # %bb.0: # %entry
6363 ; VLX-NEXT: kmovd %edi, %k1
6364 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6365 ; VLX-NEXT: kmovd %k0, %eax
6368 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6369 ; NoVLX: # %bb.0: # %entry
6370 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6371 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6372 ; NoVLX-NEXT: kmovw %edi, %k1
6373 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6374 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6375 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6376 ; NoVLX-NEXT: kmovw %k0, %eax
6377 ; NoVLX-NEXT: vzeroupper
6380 %0 = bitcast <2 x i64> %__a to <4 x i32>
6381 %load = load <2 x i64>, <2 x i64>* %__b
6382 %1 = bitcast <2 x i64> %load to <4 x i32>
6383 %2 = icmp sgt <4 x i32> %0, %1
6384 %3 = bitcast i8 %__u to <8 x i1>
6385 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6386 %4 = and <4 x i1> %2, %extract.i
6387 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6388 %6 = bitcast <32 x i1> %5 to i32
6393 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6394 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6395 ; VLX: # %bb.0: # %entry
6396 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6397 ; VLX-NEXT: kmovd %k0, %eax
6400 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6401 ; NoVLX: # %bb.0: # %entry
6402 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6403 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
6404 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6405 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6406 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6407 ; NoVLX-NEXT: kmovw %k0, %eax
6408 ; NoVLX-NEXT: vzeroupper
6411 %0 = bitcast <2 x i64> %__a to <4 x i32>
6412 %load = load i32, i32* %__b
6413 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6414 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6415 %2 = icmp sgt <4 x i32> %0, %1
6416 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6417 %4 = bitcast <32 x i1> %3 to i32
6421 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6422 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6423 ; VLX: # %bb.0: # %entry
6424 ; VLX-NEXT: kmovd %edi, %k1
6425 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6426 ; VLX-NEXT: kmovd %k0, %eax
6429 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6430 ; NoVLX: # %bb.0: # %entry
6431 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6432 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
6433 ; NoVLX-NEXT: kmovw %edi, %k1
6434 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6435 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6436 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6437 ; NoVLX-NEXT: kmovw %k0, %eax
6438 ; NoVLX-NEXT: vzeroupper
6441 %0 = bitcast <2 x i64> %__a to <4 x i32>
6442 %load = load i32, i32* %__b
6443 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6444 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6445 %2 = icmp sgt <4 x i32> %0, %1
6446 %3 = bitcast i8 %__u to <8 x i1>
6447 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6448 %4 = and <4 x i1> %extract.i, %2
6449 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6450 %6 = bitcast <32 x i1> %5 to i32
6455 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6456 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6457 ; VLX: # %bb.0: # %entry
6458 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6459 ; VLX-NEXT: kmovq %k0, %rax
6462 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6463 ; NoVLX: # %bb.0: # %entry
6464 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6465 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6466 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6467 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6468 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6469 ; NoVLX-NEXT: kmovw %k0, %eax
6470 ; NoVLX-NEXT: vzeroupper
6473 %0 = bitcast <2 x i64> %__a to <4 x i32>
6474 %1 = bitcast <2 x i64> %__b to <4 x i32>
6475 %2 = icmp sgt <4 x i32> %0, %1
6476 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6477 %4 = bitcast <64 x i1> %3 to i64
6481 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6482 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6483 ; VLX: # %bb.0: # %entry
6484 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6485 ; VLX-NEXT: kmovq %k0, %rax
6488 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6489 ; NoVLX: # %bb.0: # %entry
6490 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6491 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6492 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6493 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6494 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6495 ; NoVLX-NEXT: kmovw %k0, %eax
6496 ; NoVLX-NEXT: vzeroupper
6499 %0 = bitcast <2 x i64> %__a to <4 x i32>
6500 %load = load <2 x i64>, <2 x i64>* %__b
6501 %1 = bitcast <2 x i64> %load to <4 x i32>
6502 %2 = icmp sgt <4 x i32> %0, %1
6503 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6504 %4 = bitcast <64 x i1> %3 to i64
6508 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6509 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6510 ; VLX: # %bb.0: # %entry
6511 ; VLX-NEXT: kmovd %edi, %k1
6512 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6513 ; VLX-NEXT: kmovq %k0, %rax
6516 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6517 ; NoVLX: # %bb.0: # %entry
6518 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6519 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6520 ; NoVLX-NEXT: kmovw %edi, %k1
6521 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6522 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6523 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6524 ; NoVLX-NEXT: kmovw %k0, %eax
6525 ; NoVLX-NEXT: vzeroupper
6528 %0 = bitcast <2 x i64> %__a to <4 x i32>
6529 %1 = bitcast <2 x i64> %__b to <4 x i32>
6530 %2 = icmp sgt <4 x i32> %0, %1
6531 %3 = bitcast i8 %__u to <8 x i1>
6532 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6533 %4 = and <4 x i1> %2, %extract.i
6534 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6535 %6 = bitcast <64 x i1> %5 to i64
6539 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6540 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6541 ; VLX: # %bb.0: # %entry
6542 ; VLX-NEXT: kmovd %edi, %k1
6543 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6544 ; VLX-NEXT: kmovq %k0, %rax
6547 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6548 ; NoVLX: # %bb.0: # %entry
6549 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6550 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6551 ; NoVLX-NEXT: kmovw %edi, %k1
6552 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6553 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6554 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6555 ; NoVLX-NEXT: kmovw %k0, %eax
6556 ; NoVLX-NEXT: vzeroupper
6559 %0 = bitcast <2 x i64> %__a to <4 x i32>
6560 %load = load <2 x i64>, <2 x i64>* %__b
6561 %1 = bitcast <2 x i64> %load to <4 x i32>
6562 %2 = icmp sgt <4 x i32> %0, %1
6563 %3 = bitcast i8 %__u to <8 x i1>
6564 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6565 %4 = and <4 x i1> %2, %extract.i
6566 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6567 %6 = bitcast <64 x i1> %5 to i64
6572 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6573 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6574 ; VLX: # %bb.0: # %entry
6575 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6576 ; VLX-NEXT: kmovq %k0, %rax
6579 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6580 ; NoVLX: # %bb.0: # %entry
6581 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6582 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
6583 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6584 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6585 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6586 ; NoVLX-NEXT: kmovw %k0, %eax
6587 ; NoVLX-NEXT: vzeroupper
6590 %0 = bitcast <2 x i64> %__a to <4 x i32>
6591 %load = load i32, i32* %__b
6592 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6593 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6594 %2 = icmp sgt <4 x i32> %0, %1
6595 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6596 %4 = bitcast <64 x i1> %3 to i64
6600 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6601 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6602 ; VLX: # %bb.0: # %entry
6603 ; VLX-NEXT: kmovd %edi, %k1
6604 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6605 ; VLX-NEXT: kmovq %k0, %rax
6608 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6609 ; NoVLX: # %bb.0: # %entry
6610 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6611 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
6612 ; NoVLX-NEXT: kmovw %edi, %k1
6613 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6614 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6615 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6616 ; NoVLX-NEXT: kmovw %k0, %eax
6617 ; NoVLX-NEXT: vzeroupper
6620 %0 = bitcast <2 x i64> %__a to <4 x i32>
6621 %load = load i32, i32* %__b
6622 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6623 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6624 %2 = icmp sgt <4 x i32> %0, %1
6625 %3 = bitcast i8 %__u to <8 x i1>
6626 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6627 %4 = and <4 x i1> %extract.i, %2
6628 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6629 %6 = bitcast <64 x i1> %5 to i64
6634 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6635 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6636 ; VLX: # %bb.0: # %entry
6637 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
6638 ; VLX-NEXT: kmovd %k0, %eax
6639 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6640 ; VLX-NEXT: vzeroupper
6643 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6644 ; NoVLX: # %bb.0: # %entry
6645 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6646 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6647 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6648 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6649 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6650 ; NoVLX-NEXT: kmovw %k0, %eax
6651 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6652 ; NoVLX-NEXT: vzeroupper
6655 %0 = bitcast <4 x i64> %__a to <8 x i32>
6656 %1 = bitcast <4 x i64> %__b to <8 x i32>
6657 %2 = icmp sgt <8 x i32> %0, %1
6658 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6659 %4 = bitcast <16 x i1> %3 to i16
6663 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6664 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6665 ; VLX: # %bb.0: # %entry
6666 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
6667 ; VLX-NEXT: kmovd %k0, %eax
6668 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6669 ; VLX-NEXT: vzeroupper
6672 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6673 ; NoVLX: # %bb.0: # %entry
6674 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6675 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
6676 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6677 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6678 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6679 ; NoVLX-NEXT: kmovw %k0, %eax
6680 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6681 ; NoVLX-NEXT: vzeroupper
6684 %0 = bitcast <4 x i64> %__a to <8 x i32>
6685 %load = load <4 x i64>, <4 x i64>* %__b
6686 %1 = bitcast <4 x i64> %load to <8 x i32>
6687 %2 = icmp sgt <8 x i32> %0, %1
6688 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6689 %4 = bitcast <16 x i1> %3 to i16
6693 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6694 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6695 ; VLX: # %bb.0: # %entry
6696 ; VLX-NEXT: kmovd %edi, %k1
6697 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6698 ; VLX-NEXT: kmovd %k0, %eax
6699 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6700 ; VLX-NEXT: vzeroupper
6703 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6704 ; NoVLX: # %bb.0: # %entry
6705 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6706 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6707 ; NoVLX-NEXT: kmovw %edi, %k1
6708 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6709 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6710 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6711 ; NoVLX-NEXT: kmovw %k0, %eax
6712 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6713 ; NoVLX-NEXT: vzeroupper
6716 %0 = bitcast <4 x i64> %__a to <8 x i32>
6717 %1 = bitcast <4 x i64> %__b to <8 x i32>
6718 %2 = icmp sgt <8 x i32> %0, %1
6719 %3 = bitcast i8 %__u to <8 x i1>
6720 %4 = and <8 x i1> %2, %3
6721 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6722 %6 = bitcast <16 x i1> %5 to i16
6726 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6727 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6728 ; VLX: # %bb.0: # %entry
6729 ; VLX-NEXT: kmovd %edi, %k1
6730 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6731 ; VLX-NEXT: kmovd %k0, %eax
6732 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6733 ; VLX-NEXT: vzeroupper
6736 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6737 ; NoVLX: # %bb.0: # %entry
6738 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6739 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
6740 ; NoVLX-NEXT: kmovw %edi, %k1
6741 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6742 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6743 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6744 ; NoVLX-NEXT: kmovw %k0, %eax
6745 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6746 ; NoVLX-NEXT: vzeroupper
6749 %0 = bitcast <4 x i64> %__a to <8 x i32>
6750 %load = load <4 x i64>, <4 x i64>* %__b
6751 %1 = bitcast <4 x i64> %load to <8 x i32>
6752 %2 = icmp sgt <8 x i32> %0, %1
6753 %3 = bitcast i8 %__u to <8 x i1>
6754 %4 = and <8 x i1> %2, %3
6755 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6756 %6 = bitcast <16 x i1> %5 to i16
6761 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
6762 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6763 ; VLX: # %bb.0: # %entry
6764 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
6765 ; VLX-NEXT: kmovd %k0, %eax
6766 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6767 ; VLX-NEXT: vzeroupper
6770 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6771 ; NoVLX: # %bb.0: # %entry
6772 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6773 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
6774 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6775 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6776 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6777 ; NoVLX-NEXT: kmovw %k0, %eax
6778 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6779 ; NoVLX-NEXT: vzeroupper
6782 %0 = bitcast <4 x i64> %__a to <8 x i32>
6783 %load = load i32, i32* %__b
6784 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6785 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6786 %2 = icmp sgt <8 x i32> %0, %1
6787 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6788 %4 = bitcast <16 x i1> %3 to i16
6792 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
6793 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6794 ; VLX: # %bb.0: # %entry
6795 ; VLX-NEXT: kmovd %edi, %k1
6796 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
6797 ; VLX-NEXT: kmovd %k0, %eax
6798 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6799 ; VLX-NEXT: vzeroupper
6802 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6803 ; NoVLX: # %bb.0: # %entry
6804 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6805 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
6806 ; NoVLX-NEXT: kmovw %edi, %k1
6807 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6808 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6809 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6810 ; NoVLX-NEXT: kmovw %k0, %eax
6811 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6812 ; NoVLX-NEXT: vzeroupper
6815 %0 = bitcast <4 x i64> %__a to <8 x i32>
6816 %load = load i32, i32* %__b
6817 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6818 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6819 %2 = icmp sgt <8 x i32> %0, %1
6820 %3 = bitcast i8 %__u to <8 x i1>
6821 %4 = and <8 x i1> %3, %2
6822 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6823 %6 = bitcast <16 x i1> %5 to i16
6828 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6829 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6830 ; VLX: # %bb.0: # %entry
6831 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
6832 ; VLX-NEXT: kmovd %k0, %eax
6833 ; VLX-NEXT: vzeroupper
6836 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6837 ; NoVLX: # %bb.0: # %entry
6838 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6839 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6840 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6841 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6842 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6843 ; NoVLX-NEXT: kmovw %k0, %eax
6844 ; NoVLX-NEXT: vzeroupper
6847 %0 = bitcast <4 x i64> %__a to <8 x i32>
6848 %1 = bitcast <4 x i64> %__b to <8 x i32>
6849 %2 = icmp sgt <8 x i32> %0, %1
6850 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6851 %4 = bitcast <32 x i1> %3 to i32
6855 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6856 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6857 ; VLX: # %bb.0: # %entry
6858 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
6859 ; VLX-NEXT: kmovd %k0, %eax
6860 ; VLX-NEXT: vzeroupper
6863 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6864 ; NoVLX: # %bb.0: # %entry
6865 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6866 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
6867 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6868 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6869 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6870 ; NoVLX-NEXT: kmovw %k0, %eax
6871 ; NoVLX-NEXT: vzeroupper
6874 %0 = bitcast <4 x i64> %__a to <8 x i32>
6875 %load = load <4 x i64>, <4 x i64>* %__b
6876 %1 = bitcast <4 x i64> %load to <8 x i32>
6877 %2 = icmp sgt <8 x i32> %0, %1
6878 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6879 %4 = bitcast <32 x i1> %3 to i32
6883 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6884 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6885 ; VLX: # %bb.0: # %entry
6886 ; VLX-NEXT: kmovd %edi, %k1
6887 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6888 ; VLX-NEXT: kmovd %k0, %eax
6889 ; VLX-NEXT: vzeroupper
6892 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6893 ; NoVLX: # %bb.0: # %entry
6894 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6895 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6896 ; NoVLX-NEXT: kmovw %edi, %k1
6897 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6898 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6899 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6900 ; NoVLX-NEXT: kmovw %k0, %eax
6901 ; NoVLX-NEXT: vzeroupper
6904 %0 = bitcast <4 x i64> %__a to <8 x i32>
6905 %1 = bitcast <4 x i64> %__b to <8 x i32>
6906 %2 = icmp sgt <8 x i32> %0, %1
6907 %3 = bitcast i8 %__u to <8 x i1>
6908 %4 = and <8 x i1> %2, %3
6909 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6910 %6 = bitcast <32 x i1> %5 to i32
6914 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6915 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6916 ; VLX: # %bb.0: # %entry
6917 ; VLX-NEXT: kmovd %edi, %k1
6918 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6919 ; VLX-NEXT: kmovd %k0, %eax
6920 ; VLX-NEXT: vzeroupper
6923 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6924 ; NoVLX: # %bb.0: # %entry
6925 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6926 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
6927 ; NoVLX-NEXT: kmovw %edi, %k1
6928 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6929 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6930 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6931 ; NoVLX-NEXT: kmovw %k0, %eax
6932 ; NoVLX-NEXT: vzeroupper
6935 %0 = bitcast <4 x i64> %__a to <8 x i32>
6936 %load = load <4 x i64>, <4 x i64>* %__b
6937 %1 = bitcast <4 x i64> %load to <8 x i32>
6938 %2 = icmp sgt <8 x i32> %0, %1
6939 %3 = bitcast i8 %__u to <8 x i1>
6940 %4 = and <8 x i1> %2, %3
6941 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6942 %6 = bitcast <32 x i1> %5 to i32
6947 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
6948 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6949 ; VLX: # %bb.0: # %entry
6950 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
6951 ; VLX-NEXT: kmovd %k0, %eax
6952 ; VLX-NEXT: vzeroupper
6955 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6956 ; NoVLX: # %bb.0: # %entry
6957 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6958 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
6959 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6960 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6961 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6962 ; NoVLX-NEXT: kmovw %k0, %eax
6963 ; NoVLX-NEXT: vzeroupper
6966 %0 = bitcast <4 x i64> %__a to <8 x i32>
6967 %load = load i32, i32* %__b
6968 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6969 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6970 %2 = icmp sgt <8 x i32> %0, %1
6971 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6972 %4 = bitcast <32 x i1> %3 to i32
6976 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
6977 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6978 ; VLX: # %bb.0: # %entry
6979 ; VLX-NEXT: kmovd %edi, %k1
6980 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
6981 ; VLX-NEXT: kmovd %k0, %eax
6982 ; VLX-NEXT: vzeroupper
6985 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6986 ; NoVLX: # %bb.0: # %entry
6987 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6988 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
6989 ; NoVLX-NEXT: kmovw %edi, %k1
6990 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6991 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6992 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6993 ; NoVLX-NEXT: kmovw %k0, %eax
6994 ; NoVLX-NEXT: vzeroupper
6997 %0 = bitcast <4 x i64> %__a to <8 x i32>
6998 %load = load i32, i32* %__b
6999 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7000 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7001 %2 = icmp sgt <8 x i32> %0, %1
7002 %3 = bitcast i8 %__u to <8 x i1>
7003 %4 = and <8 x i1> %3, %2
7004 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7005 %6 = bitcast <32 x i1> %5 to i32
7010 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
7011 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
7012 ; VLX: # %bb.0: # %entry
7013 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
7014 ; VLX-NEXT: kmovq %k0, %rax
7015 ; VLX-NEXT: vzeroupper
7018 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
7019 ; NoVLX: # %bb.0: # %entry
7020 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
7021 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7022 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7023 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7024 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7025 ; NoVLX-NEXT: kmovw %k0, %eax
7026 ; NoVLX-NEXT: vzeroupper
7029 %0 = bitcast <4 x i64> %__a to <8 x i32>
7030 %1 = bitcast <4 x i64> %__b to <8 x i32>
7031 %2 = icmp sgt <8 x i32> %0, %1
7032 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7033 %4 = bitcast <64 x i1> %3 to i64
7037 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
7038 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7039 ; VLX: # %bb.0: # %entry
7040 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
7041 ; VLX-NEXT: kmovq %k0, %rax
7042 ; VLX-NEXT: vzeroupper
7045 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7046 ; NoVLX: # %bb.0: # %entry
7047 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7048 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
7049 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7050 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7051 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7052 ; NoVLX-NEXT: kmovw %k0, %eax
7053 ; NoVLX-NEXT: vzeroupper
7056 %0 = bitcast <4 x i64> %__a to <8 x i32>
7057 %load = load <4 x i64>, <4 x i64>* %__b
7058 %1 = bitcast <4 x i64> %load to <8 x i32>
7059 %2 = icmp sgt <8 x i32> %0, %1
7060 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7061 %4 = bitcast <64 x i1> %3 to i64
7065 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
7066 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7067 ; VLX: # %bb.0: # %entry
7068 ; VLX-NEXT: kmovd %edi, %k1
7069 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
7070 ; VLX-NEXT: kmovq %k0, %rax
7071 ; VLX-NEXT: vzeroupper
7074 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7075 ; NoVLX: # %bb.0: # %entry
7076 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
7077 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7078 ; NoVLX-NEXT: kmovw %edi, %k1
7079 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7080 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7081 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7082 ; NoVLX-NEXT: kmovw %k0, %eax
7083 ; NoVLX-NEXT: vzeroupper
7086 %0 = bitcast <4 x i64> %__a to <8 x i32>
7087 %1 = bitcast <4 x i64> %__b to <8 x i32>
7088 %2 = icmp sgt <8 x i32> %0, %1
7089 %3 = bitcast i8 %__u to <8 x i1>
7090 %4 = and <8 x i1> %2, %3
7091 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7092 %6 = bitcast <64 x i1> %5 to i64
7096 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
7097 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7098 ; VLX: # %bb.0: # %entry
7099 ; VLX-NEXT: kmovd %edi, %k1
7100 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
7101 ; VLX-NEXT: kmovq %k0, %rax
7102 ; VLX-NEXT: vzeroupper
7105 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7106 ; NoVLX: # %bb.0: # %entry
7107 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7108 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
7109 ; NoVLX-NEXT: kmovw %edi, %k1
7110 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7111 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7112 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7113 ; NoVLX-NEXT: kmovw %k0, %eax
7114 ; NoVLX-NEXT: vzeroupper
7117 %0 = bitcast <4 x i64> %__a to <8 x i32>
7118 %load = load <4 x i64>, <4 x i64>* %__b
7119 %1 = bitcast <4 x i64> %load to <8 x i32>
7120 %2 = icmp sgt <8 x i32> %0, %1
7121 %3 = bitcast i8 %__u to <8 x i1>
7122 %4 = and <8 x i1> %2, %3
7123 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7124 %6 = bitcast <64 x i1> %5 to i64
7129 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
7130 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7131 ; VLX: # %bb.0: # %entry
7132 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
7133 ; VLX-NEXT: kmovq %k0, %rax
7134 ; VLX-NEXT: vzeroupper
7137 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7138 ; NoVLX: # %bb.0: # %entry
7139 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7140 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
7141 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7142 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7143 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7144 ; NoVLX-NEXT: kmovw %k0, %eax
7145 ; NoVLX-NEXT: vzeroupper
7148 %0 = bitcast <4 x i64> %__a to <8 x i32>
7149 %load = load i32, i32* %__b
7150 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7151 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7152 %2 = icmp sgt <8 x i32> %0, %1
7153 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7154 %4 = bitcast <64 x i1> %3 to i64
7158 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
7159 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7160 ; VLX: # %bb.0: # %entry
7161 ; VLX-NEXT: kmovd %edi, %k1
7162 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
7163 ; VLX-NEXT: kmovq %k0, %rax
7164 ; VLX-NEXT: vzeroupper
7167 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7168 ; NoVLX: # %bb.0: # %entry
7169 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7170 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
7171 ; NoVLX-NEXT: kmovw %edi, %k1
7172 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7173 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7174 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7175 ; NoVLX-NEXT: kmovw %k0, %eax
7176 ; NoVLX-NEXT: vzeroupper
7179 %0 = bitcast <4 x i64> %__a to <8 x i32>
7180 %load = load i32, i32* %__b
7181 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7182 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7183 %2 = icmp sgt <8 x i32> %0, %1
7184 %3 = bitcast i8 %__u to <8 x i1>
7185 %4 = and <8 x i1> %3, %2
7186 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7187 %6 = bitcast <64 x i1> %5 to i64
7192 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7193 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7194 ; VLX: # %bb.0: # %entry
7195 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7196 ; VLX-NEXT: kmovd %k0, %eax
7197 ; VLX-NEXT: vzeroupper
7200 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7201 ; NoVLX: # %bb.0: # %entry
7202 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7203 ; NoVLX-NEXT: kmovw %k0, %eax
7204 ; NoVLX-NEXT: vzeroupper
7207 %0 = bitcast <8 x i64> %__a to <16 x i32>
7208 %1 = bitcast <8 x i64> %__b to <16 x i32>
7209 %2 = icmp sgt <16 x i32> %0, %1
7210 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7211 %4 = bitcast <32 x i1> %3 to i32
7215 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7216 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7217 ; VLX: # %bb.0: # %entry
7218 ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7219 ; VLX-NEXT: kmovd %k0, %eax
7220 ; VLX-NEXT: vzeroupper
7223 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7224 ; NoVLX: # %bb.0: # %entry
7225 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7226 ; NoVLX-NEXT: kmovw %k0, %eax
7227 ; NoVLX-NEXT: vzeroupper
7230 %0 = bitcast <8 x i64> %__a to <16 x i32>
7231 %load = load <8 x i64>, <8 x i64>* %__b
7232 %1 = bitcast <8 x i64> %load to <16 x i32>
7233 %2 = icmp sgt <16 x i32> %0, %1
7234 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7235 %4 = bitcast <32 x i1> %3 to i32
7239 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7240 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7241 ; VLX: # %bb.0: # %entry
7242 ; VLX-NEXT: kmovd %edi, %k1
7243 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7244 ; VLX-NEXT: kmovd %k0, %eax
7245 ; VLX-NEXT: vzeroupper
7248 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7249 ; NoVLX: # %bb.0: # %entry
7250 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7251 ; NoVLX-NEXT: kmovw %k0, %eax
7252 ; NoVLX-NEXT: andl %edi, %eax
7253 ; NoVLX-NEXT: vzeroupper
7256 %0 = bitcast <8 x i64> %__a to <16 x i32>
7257 %1 = bitcast <8 x i64> %__b to <16 x i32>
7258 %2 = icmp sgt <16 x i32> %0, %1
7259 %3 = bitcast i16 %__u to <16 x i1>
7260 %4 = and <16 x i1> %2, %3
7261 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7262 %6 = bitcast <32 x i1> %5 to i32
7266 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7267 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7268 ; VLX: # %bb.0: # %entry
7269 ; VLX-NEXT: kmovd %edi, %k1
7270 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7271 ; VLX-NEXT: kmovd %k0, %eax
7272 ; VLX-NEXT: vzeroupper
7275 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7276 ; NoVLX: # %bb.0: # %entry
7277 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0
7278 ; NoVLX-NEXT: kmovw %k0, %eax
7279 ; NoVLX-NEXT: andl %edi, %eax
7280 ; NoVLX-NEXT: vzeroupper
7283 %0 = bitcast <8 x i64> %__a to <16 x i32>
7284 %load = load <8 x i64>, <8 x i64>* %__b
7285 %1 = bitcast <8 x i64> %load to <16 x i32>
7286 %2 = icmp sgt <16 x i32> %0, %1
7287 %3 = bitcast i16 %__u to <16 x i1>
7288 %4 = and <16 x i1> %2, %3
7289 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7290 %6 = bitcast <32 x i1> %5 to i32
7295 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
7296 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7297 ; VLX: # %bb.0: # %entry
7298 ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7299 ; VLX-NEXT: kmovd %k0, %eax
7300 ; VLX-NEXT: vzeroupper
7303 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7304 ; NoVLX: # %bb.0: # %entry
7305 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7306 ; NoVLX-NEXT: kmovw %k0, %eax
7307 ; NoVLX-NEXT: vzeroupper
7310 %0 = bitcast <8 x i64> %__a to <16 x i32>
7311 %load = load i32, i32* %__b
7312 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7313 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7314 %2 = icmp sgt <16 x i32> %0, %1
7315 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7316 %4 = bitcast <32 x i1> %3 to i32
7320 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
7321 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7322 ; VLX: # %bb.0: # %entry
7323 ; VLX-NEXT: kmovd %edi, %k1
7324 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7325 ; VLX-NEXT: kmovd %k0, %eax
7326 ; VLX-NEXT: vzeroupper
7329 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7330 ; NoVLX: # %bb.0: # %entry
7331 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7332 ; NoVLX-NEXT: kmovw %k0, %eax
7333 ; NoVLX-NEXT: andl %edi, %eax
7334 ; NoVLX-NEXT: vzeroupper
7337 %0 = bitcast <8 x i64> %__a to <16 x i32>
7338 %load = load i32, i32* %__b
7339 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7340 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7341 %2 = icmp sgt <16 x i32> %0, %1
7342 %3 = bitcast i16 %__u to <16 x i1>
7343 %4 = and <16 x i1> %3, %2
7344 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7345 %6 = bitcast <32 x i1> %5 to i32
7350 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7351 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7352 ; VLX: # %bb.0: # %entry
7353 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7354 ; VLX-NEXT: kmovq %k0, %rax
7355 ; VLX-NEXT: vzeroupper
7358 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7359 ; NoVLX: # %bb.0: # %entry
7360 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7361 ; NoVLX-NEXT: kmovw %k0, %eax
7362 ; NoVLX-NEXT: vzeroupper
7365 %0 = bitcast <8 x i64> %__a to <16 x i32>
7366 %1 = bitcast <8 x i64> %__b to <16 x i32>
7367 %2 = icmp sgt <16 x i32> %0, %1
7368 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7369 %4 = bitcast <64 x i1> %3 to i64
7373 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7374 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7375 ; VLX: # %bb.0: # %entry
7376 ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7377 ; VLX-NEXT: kmovq %k0, %rax
7378 ; VLX-NEXT: vzeroupper
7381 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7382 ; NoVLX: # %bb.0: # %entry
7383 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7384 ; NoVLX-NEXT: kmovw %k0, %eax
7385 ; NoVLX-NEXT: vzeroupper
7388 %0 = bitcast <8 x i64> %__a to <16 x i32>
7389 %load = load <8 x i64>, <8 x i64>* %__b
7390 %1 = bitcast <8 x i64> %load to <16 x i32>
7391 %2 = icmp sgt <16 x i32> %0, %1
7392 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7393 %4 = bitcast <64 x i1> %3 to i64
7397 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7398 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7399 ; VLX: # %bb.0: # %entry
7400 ; VLX-NEXT: kmovd %edi, %k1
7401 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7402 ; VLX-NEXT: kmovq %k0, %rax
7403 ; VLX-NEXT: vzeroupper
7406 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7407 ; NoVLX: # %bb.0: # %entry
7408 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7409 ; NoVLX-NEXT: kmovw %k0, %eax
7410 ; NoVLX-NEXT: andl %edi, %eax
7411 ; NoVLX-NEXT: vzeroupper
7414 %0 = bitcast <8 x i64> %__a to <16 x i32>
7415 %1 = bitcast <8 x i64> %__b to <16 x i32>
7416 %2 = icmp sgt <16 x i32> %0, %1
7417 %3 = bitcast i16 %__u to <16 x i1>
7418 %4 = and <16 x i1> %2, %3
7419 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7420 %6 = bitcast <64 x i1> %5 to i64
7424 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7425 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7426 ; VLX: # %bb.0: # %entry
7427 ; VLX-NEXT: kmovd %edi, %k1
7428 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7429 ; VLX-NEXT: kmovq %k0, %rax
7430 ; VLX-NEXT: vzeroupper
7433 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7434 ; NoVLX: # %bb.0: # %entry
7435 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0
7436 ; NoVLX-NEXT: kmovw %k0, %eax
7437 ; NoVLX-NEXT: andl %edi, %eax
7438 ; NoVLX-NEXT: vzeroupper
7441 %0 = bitcast <8 x i64> %__a to <16 x i32>
7442 %load = load <8 x i64>, <8 x i64>* %__b
7443 %1 = bitcast <8 x i64> %load to <16 x i32>
7444 %2 = icmp sgt <16 x i32> %0, %1
7445 %3 = bitcast i16 %__u to <16 x i1>
7446 %4 = and <16 x i1> %2, %3
7447 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7448 %6 = bitcast <64 x i1> %5 to i64
7453 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
7454 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7455 ; VLX: # %bb.0: # %entry
7456 ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7457 ; VLX-NEXT: kmovq %k0, %rax
7458 ; VLX-NEXT: vzeroupper
7461 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7462 ; NoVLX: # %bb.0: # %entry
7463 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7464 ; NoVLX-NEXT: kmovw %k0, %eax
7465 ; NoVLX-NEXT: vzeroupper
7468 %0 = bitcast <8 x i64> %__a to <16 x i32>
7469 %load = load i32, i32* %__b
7470 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7471 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7472 %2 = icmp sgt <16 x i32> %0, %1
7473 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7474 %4 = bitcast <64 x i1> %3 to i64
7478 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
7479 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7480 ; VLX: # %bb.0: # %entry
7481 ; VLX-NEXT: kmovd %edi, %k1
7482 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7483 ; VLX-NEXT: kmovq %k0, %rax
7484 ; VLX-NEXT: vzeroupper
7487 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7488 ; NoVLX: # %bb.0: # %entry
7489 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7490 ; NoVLX-NEXT: kmovw %k0, %eax
7491 ; NoVLX-NEXT: andl %edi, %eax
7492 ; NoVLX-NEXT: vzeroupper
7495 %0 = bitcast <8 x i64> %__a to <16 x i32>
7496 %load = load i32, i32* %__b
7497 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7498 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7499 %2 = icmp sgt <16 x i32> %0, %1
7500 %3 = bitcast i16 %__u to <16 x i1>
7501 %4 = and <16 x i1> %3, %2
7502 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7503 %6 = bitcast <64 x i1> %5 to i64
7508 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7509 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7510 ; VLX: # %bb.0: # %entry
7511 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7512 ; VLX-NEXT: kmovb %k0, %eax
7515 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7516 ; NoVLX: # %bb.0: # %entry
7517 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7518 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7519 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7520 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7521 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7522 ; NoVLX-NEXT: kmovw %k0, %eax
7523 ; NoVLX-NEXT: vzeroupper
7526 %0 = bitcast <2 x i64> %__a to <2 x i64>
7527 %1 = bitcast <2 x i64> %__b to <2 x i64>
7528 %2 = icmp sgt <2 x i64> %0, %1
7529 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7530 %4 = bitcast <4 x i1> %3 to i4
7534 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7535 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7536 ; VLX: # %bb.0: # %entry
7537 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7538 ; VLX-NEXT: kmovb %k0, %eax
7541 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7542 ; NoVLX: # %bb.0: # %entry
7543 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7544 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7545 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7546 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7547 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7548 ; NoVLX-NEXT: kmovw %k0, %eax
7549 ; NoVLX-NEXT: vzeroupper
7552 %0 = bitcast <2 x i64> %__a to <2 x i64>
7553 %load = load <2 x i64>, <2 x i64>* %__b
7554 %1 = bitcast <2 x i64> %load to <2 x i64>
7555 %2 = icmp sgt <2 x i64> %0, %1
7556 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7557 %4 = bitcast <4 x i1> %3 to i4
7561 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7562 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7563 ; VLX: # %bb.0: # %entry
7564 ; VLX-NEXT: kmovd %edi, %k1
7565 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7566 ; VLX-NEXT: kmovb %k0, %eax
7569 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7570 ; NoVLX: # %bb.0: # %entry
7571 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7572 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7573 ; NoVLX-NEXT: kmovw %edi, %k1
7574 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7575 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7576 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7577 ; NoVLX-NEXT: kmovw %k0, %eax
7578 ; NoVLX-NEXT: vzeroupper
7581 %0 = bitcast <2 x i64> %__a to <2 x i64>
7582 %1 = bitcast <2 x i64> %__b to <2 x i64>
7583 %2 = icmp sgt <2 x i64> %0, %1
7584 %3 = bitcast i8 %__u to <8 x i1>
7585 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7586 %4 = and <2 x i1> %2, %extract.i
7587 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7588 %6 = bitcast <4 x i1> %5 to i4
7592 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7593 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7594 ; VLX: # %bb.0: # %entry
7595 ; VLX-NEXT: kmovd %edi, %k1
7596 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7597 ; VLX-NEXT: kmovb %k0, %eax
7600 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7601 ; NoVLX: # %bb.0: # %entry
7602 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7603 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7604 ; NoVLX-NEXT: kmovw %edi, %k1
7605 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7606 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7607 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7608 ; NoVLX-NEXT: kmovw %k0, %eax
7609 ; NoVLX-NEXT: vzeroupper
7612 %0 = bitcast <2 x i64> %__a to <2 x i64>
7613 %load = load <2 x i64>, <2 x i64>* %__b
7614 %1 = bitcast <2 x i64> %load to <2 x i64>
7615 %2 = icmp sgt <2 x i64> %0, %1
7616 %3 = bitcast i8 %__u to <8 x i1>
7617 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7618 %4 = and <2 x i1> %2, %extract.i
7619 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7620 %6 = bitcast <4 x i1> %5 to i4
7625 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
7626 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7627 ; VLX: # %bb.0: # %entry
7628 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7629 ; VLX-NEXT: kmovb %k0, %eax
7632 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7633 ; NoVLX: # %bb.0: # %entry
7634 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7635 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
7636 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7637 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7638 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7639 ; NoVLX-NEXT: kmovw %k0, %eax
7640 ; NoVLX-NEXT: vzeroupper
7643 %0 = bitcast <2 x i64> %__a to <2 x i64>
7644 %load = load i64, i64* %__b
7645 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7646 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7647 %2 = icmp sgt <2 x i64> %0, %1
7648 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7649 %4 = bitcast <4 x i1> %3 to i4
7653 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
7654 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7655 ; VLX: # %bb.0: # %entry
7656 ; VLX-NEXT: kmovd %edi, %k1
7657 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7658 ; VLX-NEXT: kmovb %k0, %eax
7661 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7662 ; NoVLX: # %bb.0: # %entry
7663 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7664 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
7665 ; NoVLX-NEXT: kmovw %edi, %k1
7666 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7667 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7668 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7669 ; NoVLX-NEXT: kmovw %k0, %eax
7670 ; NoVLX-NEXT: vzeroupper
7673 %0 = bitcast <2 x i64> %__a to <2 x i64>
7674 %load = load i64, i64* %__b
7675 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7676 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7677 %2 = icmp sgt <2 x i64> %0, %1
7678 %3 = bitcast i8 %__u to <8 x i1>
7679 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7680 %4 = and <2 x i1> %extract.i, %2
7681 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7682 %6 = bitcast <4 x i1> %5 to i4
7687 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7688 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7689 ; VLX: # %bb.0: # %entry
7690 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7691 ; VLX-NEXT: kmovd %k0, %eax
7692 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7695 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7696 ; NoVLX: # %bb.0: # %entry
7697 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7698 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7699 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7700 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7701 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7702 ; NoVLX-NEXT: kmovw %k0, %eax
7703 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7704 ; NoVLX-NEXT: vzeroupper
7707 %0 = bitcast <2 x i64> %__a to <2 x i64>
7708 %1 = bitcast <2 x i64> %__b to <2 x i64>
7709 %2 = icmp sgt <2 x i64> %0, %1
7710 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7711 %4 = bitcast <8 x i1> %3 to i8
7715 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7716 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7717 ; VLX: # %bb.0: # %entry
7718 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7719 ; VLX-NEXT: kmovd %k0, %eax
7720 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7723 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7724 ; NoVLX: # %bb.0: # %entry
7725 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7726 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7727 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7728 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7729 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7730 ; NoVLX-NEXT: kmovw %k0, %eax
7731 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7732 ; NoVLX-NEXT: vzeroupper
7735 %0 = bitcast <2 x i64> %__a to <2 x i64>
7736 %load = load <2 x i64>, <2 x i64>* %__b
7737 %1 = bitcast <2 x i64> %load to <2 x i64>
7738 %2 = icmp sgt <2 x i64> %0, %1
7739 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7740 %4 = bitcast <8 x i1> %3 to i8
7744 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7745 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7746 ; VLX: # %bb.0: # %entry
7747 ; VLX-NEXT: kmovd %edi, %k1
7748 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7749 ; VLX-NEXT: kmovd %k0, %eax
7750 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7753 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7754 ; NoVLX: # %bb.0: # %entry
7755 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7756 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7757 ; NoVLX-NEXT: kmovw %edi, %k1
7758 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7759 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7760 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7761 ; NoVLX-NEXT: kmovw %k0, %eax
7762 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7763 ; NoVLX-NEXT: vzeroupper
7766 %0 = bitcast <2 x i64> %__a to <2 x i64>
7767 %1 = bitcast <2 x i64> %__b to <2 x i64>
7768 %2 = icmp sgt <2 x i64> %0, %1
7769 %3 = bitcast i8 %__u to <8 x i1>
7770 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7771 %4 = and <2 x i1> %2, %extract.i
7772 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7773 %6 = bitcast <8 x i1> %5 to i8
7777 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7778 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7779 ; VLX: # %bb.0: # %entry
7780 ; VLX-NEXT: kmovd %edi, %k1
7781 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7782 ; VLX-NEXT: kmovd %k0, %eax
7783 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7786 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7787 ; NoVLX: # %bb.0: # %entry
7788 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7789 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7790 ; NoVLX-NEXT: kmovw %edi, %k1
7791 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7792 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7793 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7794 ; NoVLX-NEXT: kmovw %k0, %eax
7795 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7796 ; NoVLX-NEXT: vzeroupper
7799 %0 = bitcast <2 x i64> %__a to <2 x i64>
7800 %load = load <2 x i64>, <2 x i64>* %__b
7801 %1 = bitcast <2 x i64> %load to <2 x i64>
7802 %2 = icmp sgt <2 x i64> %0, %1
7803 %3 = bitcast i8 %__u to <8 x i1>
7804 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7805 %4 = and <2 x i1> %2, %extract.i
7806 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7807 %6 = bitcast <8 x i1> %5 to i8
7812 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
7813 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7814 ; VLX: # %bb.0: # %entry
7815 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7816 ; VLX-NEXT: kmovd %k0, %eax
7817 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7820 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7821 ; NoVLX: # %bb.0: # %entry
7822 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7823 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
7824 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7825 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7826 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7827 ; NoVLX-NEXT: kmovw %k0, %eax
7828 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7829 ; NoVLX-NEXT: vzeroupper
7832 %0 = bitcast <2 x i64> %__a to <2 x i64>
7833 %load = load i64, i64* %__b
7834 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7835 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7836 %2 = icmp sgt <2 x i64> %0, %1
7837 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7838 %4 = bitcast <8 x i1> %3 to i8
7842 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
7843 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7844 ; VLX: # %bb.0: # %entry
7845 ; VLX-NEXT: kmovd %edi, %k1
7846 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7847 ; VLX-NEXT: kmovd %k0, %eax
7848 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7851 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7852 ; NoVLX: # %bb.0: # %entry
7853 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7854 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
7855 ; NoVLX-NEXT: kmovw %edi, %k1
7856 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7857 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7858 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7859 ; NoVLX-NEXT: kmovw %k0, %eax
7860 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7861 ; NoVLX-NEXT: vzeroupper
7864 %0 = bitcast <2 x i64> %__a to <2 x i64>
7865 %load = load i64, i64* %__b
7866 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7867 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7868 %2 = icmp sgt <2 x i64> %0, %1
7869 %3 = bitcast i8 %__u to <8 x i1>
7870 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7871 %4 = and <2 x i1> %extract.i, %2
7872 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7873 %6 = bitcast <8 x i1> %5 to i8
7878 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7879 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7880 ; VLX: # %bb.0: # %entry
7881 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7882 ; VLX-NEXT: kmovd %k0, %eax
7883 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7886 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7887 ; NoVLX: # %bb.0: # %entry
7888 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7889 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7890 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7891 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7892 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7893 ; NoVLX-NEXT: kmovw %k0, %eax
7894 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7895 ; NoVLX-NEXT: vzeroupper
7898 %0 = bitcast <2 x i64> %__a to <2 x i64>
7899 %1 = bitcast <2 x i64> %__b to <2 x i64>
7900 %2 = icmp sgt <2 x i64> %0, %1
7901 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7902 %4 = bitcast <16 x i1> %3 to i16
7906 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7907 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7908 ; VLX: # %bb.0: # %entry
7909 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7910 ; VLX-NEXT: kmovd %k0, %eax
7911 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7914 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7915 ; NoVLX: # %bb.0: # %entry
7916 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7917 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7918 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7919 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7920 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7921 ; NoVLX-NEXT: kmovw %k0, %eax
7922 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7923 ; NoVLX-NEXT: vzeroupper
7926 %0 = bitcast <2 x i64> %__a to <2 x i64>
7927 %load = load <2 x i64>, <2 x i64>* %__b
7928 %1 = bitcast <2 x i64> %load to <2 x i64>
7929 %2 = icmp sgt <2 x i64> %0, %1
7930 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7931 %4 = bitcast <16 x i1> %3 to i16
7935 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7936 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
7937 ; VLX: # %bb.0: # %entry
7938 ; VLX-NEXT: kmovd %edi, %k1
7939 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7940 ; VLX-NEXT: kmovd %k0, %eax
7941 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7944 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
7945 ; NoVLX: # %bb.0: # %entry
7946 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7947 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7948 ; NoVLX-NEXT: kmovw %edi, %k1
7949 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7950 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7951 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7952 ; NoVLX-NEXT: kmovw %k0, %eax
7953 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7954 ; NoVLX-NEXT: vzeroupper
7957 %0 = bitcast <2 x i64> %__a to <2 x i64>
7958 %1 = bitcast <2 x i64> %__b to <2 x i64>
7959 %2 = icmp sgt <2 x i64> %0, %1
7960 %3 = bitcast i8 %__u to <8 x i1>
7961 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7962 %4 = and <2 x i1> %2, %extract.i
7963 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7964 %6 = bitcast <16 x i1> %5 to i16
7968 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7969 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
7970 ; VLX: # %bb.0: # %entry
7971 ; VLX-NEXT: kmovd %edi, %k1
7972 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7973 ; VLX-NEXT: kmovd %k0, %eax
7974 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7977 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
7978 ; NoVLX: # %bb.0: # %entry
7979 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7980 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7981 ; NoVLX-NEXT: kmovw %edi, %k1
7982 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7983 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7984 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7985 ; NoVLX-NEXT: kmovw %k0, %eax
7986 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7987 ; NoVLX-NEXT: vzeroupper
7990 %0 = bitcast <2 x i64> %__a to <2 x i64>
7991 %load = load <2 x i64>, <2 x i64>* %__b
7992 %1 = bitcast <2 x i64> %load to <2 x i64>
7993 %2 = icmp sgt <2 x i64> %0, %1
7994 %3 = bitcast i8 %__u to <8 x i1>
7995 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7996 %4 = and <2 x i1> %2, %extract.i
7997 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7998 %6 = bitcast <16 x i1> %5 to i16
8003 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8004 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8005 ; VLX: # %bb.0: # %entry
8006 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8007 ; VLX-NEXT: kmovd %k0, %eax
8008 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8011 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8012 ; NoVLX: # %bb.0: # %entry
8013 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8014 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
8015 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8016 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8017 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8018 ; NoVLX-NEXT: kmovw %k0, %eax
8019 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8020 ; NoVLX-NEXT: vzeroupper
8023 %0 = bitcast <2 x i64> %__a to <2 x i64>
8024 %load = load i64, i64* %__b
8025 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8026 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8027 %2 = icmp sgt <2 x i64> %0, %1
8028 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8029 %4 = bitcast <16 x i1> %3 to i16
8033 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8034 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8035 ; VLX: # %bb.0: # %entry
8036 ; VLX-NEXT: kmovd %edi, %k1
8037 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8038 ; VLX-NEXT: kmovd %k0, %eax
8039 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8042 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8043 ; NoVLX: # %bb.0: # %entry
8044 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8045 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
8046 ; NoVLX-NEXT: kmovw %edi, %k1
8047 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8048 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8049 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8050 ; NoVLX-NEXT: kmovw %k0, %eax
8051 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8052 ; NoVLX-NEXT: vzeroupper
8055 %0 = bitcast <2 x i64> %__a to <2 x i64>
8056 %load = load i64, i64* %__b
8057 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8058 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8059 %2 = icmp sgt <2 x i64> %0, %1
8060 %3 = bitcast i8 %__u to <8 x i1>
8061 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8062 %4 = and <2 x i1> %extract.i, %2
8063 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8064 %6 = bitcast <16 x i1> %5 to i16
8069 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8070 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8071 ; VLX: # %bb.0: # %entry
8072 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
8073 ; VLX-NEXT: kmovd %k0, %eax
8076 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8077 ; NoVLX: # %bb.0: # %entry
8078 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8079 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8080 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8081 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8082 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8083 ; NoVLX-NEXT: kmovw %k0, %eax
8084 ; NoVLX-NEXT: vzeroupper
8087 %0 = bitcast <2 x i64> %__a to <2 x i64>
8088 %1 = bitcast <2 x i64> %__b to <2 x i64>
8089 %2 = icmp sgt <2 x i64> %0, %1
8090 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8091 %4 = bitcast <32 x i1> %3 to i32
8095 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8096 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8097 ; VLX: # %bb.0: # %entry
8098 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
8099 ; VLX-NEXT: kmovd %k0, %eax
8102 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8103 ; NoVLX: # %bb.0: # %entry
8104 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8105 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
8106 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8107 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8108 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8109 ; NoVLX-NEXT: kmovw %k0, %eax
8110 ; NoVLX-NEXT: vzeroupper
8113 %0 = bitcast <2 x i64> %__a to <2 x i64>
8114 %load = load <2 x i64>, <2 x i64>* %__b
8115 %1 = bitcast <2 x i64> %load to <2 x i64>
8116 %2 = icmp sgt <2 x i64> %0, %1
8117 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8118 %4 = bitcast <32 x i1> %3 to i32
8122 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8123 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8124 ; VLX: # %bb.0: # %entry
8125 ; VLX-NEXT: kmovd %edi, %k1
8126 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8127 ; VLX-NEXT: kmovd %k0, %eax
8130 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8131 ; NoVLX: # %bb.0: # %entry
8132 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8133 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8134 ; NoVLX-NEXT: kmovw %edi, %k1
8135 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8136 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8137 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8138 ; NoVLX-NEXT: kmovw %k0, %eax
8139 ; NoVLX-NEXT: vzeroupper
8142 %0 = bitcast <2 x i64> %__a to <2 x i64>
8143 %1 = bitcast <2 x i64> %__b to <2 x i64>
8144 %2 = icmp sgt <2 x i64> %0, %1
8145 %3 = bitcast i8 %__u to <8 x i1>
8146 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8147 %4 = and <2 x i1> %2, %extract.i
8148 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8149 %6 = bitcast <32 x i1> %5 to i32
8153 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8154 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8155 ; VLX: # %bb.0: # %entry
8156 ; VLX-NEXT: kmovd %edi, %k1
8157 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8158 ; VLX-NEXT: kmovd %k0, %eax
8161 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8162 ; NoVLX: # %bb.0: # %entry
8163 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8164 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
8165 ; NoVLX-NEXT: kmovw %edi, %k1
8166 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8167 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8168 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8169 ; NoVLX-NEXT: kmovw %k0, %eax
8170 ; NoVLX-NEXT: vzeroupper
8173 %0 = bitcast <2 x i64> %__a to <2 x i64>
8174 %load = load <2 x i64>, <2 x i64>* %__b
8175 %1 = bitcast <2 x i64> %load to <2 x i64>
8176 %2 = icmp sgt <2 x i64> %0, %1
8177 %3 = bitcast i8 %__u to <8 x i1>
8178 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8179 %4 = and <2 x i1> %2, %extract.i
8180 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8181 %6 = bitcast <32 x i1> %5 to i32
8186 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8187 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8188 ; VLX: # %bb.0: # %entry
8189 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8190 ; VLX-NEXT: kmovd %k0, %eax
8193 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8194 ; NoVLX: # %bb.0: # %entry
8195 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8196 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
8197 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8198 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8199 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8200 ; NoVLX-NEXT: kmovw %k0, %eax
8201 ; NoVLX-NEXT: vzeroupper
8204 %0 = bitcast <2 x i64> %__a to <2 x i64>
8205 %load = load i64, i64* %__b
8206 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8207 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8208 %2 = icmp sgt <2 x i64> %0, %1
8209 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8210 %4 = bitcast <32 x i1> %3 to i32
8214 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8215 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8216 ; VLX: # %bb.0: # %entry
8217 ; VLX-NEXT: kmovd %edi, %k1
8218 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8219 ; VLX-NEXT: kmovd %k0, %eax
8222 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8223 ; NoVLX: # %bb.0: # %entry
8224 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8225 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
8226 ; NoVLX-NEXT: kmovw %edi, %k1
8227 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8228 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8229 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8230 ; NoVLX-NEXT: kmovw %k0, %eax
8231 ; NoVLX-NEXT: vzeroupper
8234 %0 = bitcast <2 x i64> %__a to <2 x i64>
8235 %load = load i64, i64* %__b
8236 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8237 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8238 %2 = icmp sgt <2 x i64> %0, %1
8239 %3 = bitcast i8 %__u to <8 x i1>
8240 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8241 %4 = and <2 x i1> %extract.i, %2
8242 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8243 %6 = bitcast <32 x i1> %5 to i32
8248 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8249 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8250 ; VLX: # %bb.0: # %entry
8251 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
8252 ; VLX-NEXT: kmovq %k0, %rax
8255 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8256 ; NoVLX: # %bb.0: # %entry
8257 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8258 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8259 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8260 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8261 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8262 ; NoVLX-NEXT: kmovw %k0, %eax
8263 ; NoVLX-NEXT: vzeroupper
8266 %0 = bitcast <2 x i64> %__a to <2 x i64>
8267 %1 = bitcast <2 x i64> %__b to <2 x i64>
8268 %2 = icmp sgt <2 x i64> %0, %1
8269 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8270 %4 = bitcast <64 x i1> %3 to i64
8274 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8275 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8276 ; VLX: # %bb.0: # %entry
8277 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
8278 ; VLX-NEXT: kmovq %k0, %rax
8281 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8282 ; NoVLX: # %bb.0: # %entry
8283 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8284 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
8285 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8286 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8287 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8288 ; NoVLX-NEXT: kmovw %k0, %eax
8289 ; NoVLX-NEXT: vzeroupper
8292 %0 = bitcast <2 x i64> %__a to <2 x i64>
8293 %load = load <2 x i64>, <2 x i64>* %__b
8294 %1 = bitcast <2 x i64> %load to <2 x i64>
8295 %2 = icmp sgt <2 x i64> %0, %1
8296 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8297 %4 = bitcast <64 x i1> %3 to i64
8301 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8302 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8303 ; VLX: # %bb.0: # %entry
8304 ; VLX-NEXT: kmovd %edi, %k1
8305 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8306 ; VLX-NEXT: kmovq %k0, %rax
8309 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8310 ; NoVLX: # %bb.0: # %entry
8311 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8312 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8313 ; NoVLX-NEXT: kmovw %edi, %k1
8314 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8315 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8316 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8317 ; NoVLX-NEXT: kmovw %k0, %eax
8318 ; NoVLX-NEXT: vzeroupper
8321 %0 = bitcast <2 x i64> %__a to <2 x i64>
8322 %1 = bitcast <2 x i64> %__b to <2 x i64>
8323 %2 = icmp sgt <2 x i64> %0, %1
8324 %3 = bitcast i8 %__u to <8 x i1>
8325 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8326 %4 = and <2 x i1> %2, %extract.i
8327 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8328 %6 = bitcast <64 x i1> %5 to i64
8332 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8333 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8334 ; VLX: # %bb.0: # %entry
8335 ; VLX-NEXT: kmovd %edi, %k1
8336 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8337 ; VLX-NEXT: kmovq %k0, %rax
8340 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8341 ; NoVLX: # %bb.0: # %entry
8342 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8343 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
8344 ; NoVLX-NEXT: kmovw %edi, %k1
8345 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8346 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8347 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8348 ; NoVLX-NEXT: kmovw %k0, %eax
8349 ; NoVLX-NEXT: vzeroupper
8352 %0 = bitcast <2 x i64> %__a to <2 x i64>
8353 %load = load <2 x i64>, <2 x i64>* %__b
8354 %1 = bitcast <2 x i64> %load to <2 x i64>
8355 %2 = icmp sgt <2 x i64> %0, %1
8356 %3 = bitcast i8 %__u to <8 x i1>
8357 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8358 %4 = and <2 x i1> %2, %extract.i
8359 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8360 %6 = bitcast <64 x i1> %5 to i64
8365 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8366 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8367 ; VLX: # %bb.0: # %entry
8368 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8369 ; VLX-NEXT: kmovq %k0, %rax
8372 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8373 ; NoVLX: # %bb.0: # %entry
8374 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8375 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
8376 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8377 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8378 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8379 ; NoVLX-NEXT: kmovw %k0, %eax
8380 ; NoVLX-NEXT: vzeroupper
8383 %0 = bitcast <2 x i64> %__a to <2 x i64>
8384 %load = load i64, i64* %__b
8385 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8386 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8387 %2 = icmp sgt <2 x i64> %0, %1
8388 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8389 %4 = bitcast <64 x i1> %3 to i64
8393 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8394 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8395 ; VLX: # %bb.0: # %entry
8396 ; VLX-NEXT: kmovd %edi, %k1
8397 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8398 ; VLX-NEXT: kmovq %k0, %rax
8401 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8402 ; NoVLX: # %bb.0: # %entry
8403 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8404 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
8405 ; NoVLX-NEXT: kmovw %edi, %k1
8406 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8407 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8408 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8409 ; NoVLX-NEXT: kmovw %k0, %eax
8410 ; NoVLX-NEXT: vzeroupper
8413 %0 = bitcast <2 x i64> %__a to <2 x i64>
8414 %load = load i64, i64* %__b
8415 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8416 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8417 %2 = icmp sgt <2 x i64> %0, %1
8418 %3 = bitcast i8 %__u to <8 x i1>
8419 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8420 %4 = and <2 x i1> %extract.i, %2
8421 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8422 %6 = bitcast <64 x i1> %5 to i64
8427 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8428 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8429 ; VLX: # %bb.0: # %entry
8430 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8431 ; VLX-NEXT: kmovd %k0, %eax
8432 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8433 ; VLX-NEXT: vzeroupper
8436 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8437 ; NoVLX: # %bb.0: # %entry
8438 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8439 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8440 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8441 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8442 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8443 ; NoVLX-NEXT: kmovw %k0, %eax
8444 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8445 ; NoVLX-NEXT: vzeroupper
8448 %0 = bitcast <4 x i64> %__a to <4 x i64>
8449 %1 = bitcast <4 x i64> %__b to <4 x i64>
8450 %2 = icmp sgt <4 x i64> %0, %1
8451 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8452 %4 = bitcast <8 x i1> %3 to i8
8456 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8457 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8458 ; VLX: # %bb.0: # %entry
8459 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8460 ; VLX-NEXT: kmovd %k0, %eax
8461 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8462 ; VLX-NEXT: vzeroupper
8465 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8466 ; NoVLX: # %bb.0: # %entry
8467 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8468 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8469 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8470 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8471 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8472 ; NoVLX-NEXT: kmovw %k0, %eax
8473 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8474 ; NoVLX-NEXT: vzeroupper
8477 %0 = bitcast <4 x i64> %__a to <4 x i64>
8478 %load = load <4 x i64>, <4 x i64>* %__b
8479 %1 = bitcast <4 x i64> %load to <4 x i64>
8480 %2 = icmp sgt <4 x i64> %0, %1
8481 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8482 %4 = bitcast <8 x i1> %3 to i8
8486 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8487 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8488 ; VLX: # %bb.0: # %entry
8489 ; VLX-NEXT: kmovd %edi, %k1
8490 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8491 ; VLX-NEXT: kmovd %k0, %eax
8492 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8493 ; VLX-NEXT: vzeroupper
8496 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8497 ; NoVLX: # %bb.0: # %entry
8498 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8499 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8500 ; NoVLX-NEXT: kmovw %edi, %k1
8501 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8502 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8503 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8504 ; NoVLX-NEXT: kmovw %k0, %eax
8505 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8506 ; NoVLX-NEXT: vzeroupper
8509 %0 = bitcast <4 x i64> %__a to <4 x i64>
8510 %1 = bitcast <4 x i64> %__b to <4 x i64>
8511 %2 = icmp sgt <4 x i64> %0, %1
8512 %3 = bitcast i8 %__u to <8 x i1>
8513 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8514 %4 = and <4 x i1> %2, %extract.i
8515 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8516 %6 = bitcast <8 x i1> %5 to i8
8520 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8521 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8522 ; VLX: # %bb.0: # %entry
8523 ; VLX-NEXT: kmovd %edi, %k1
8524 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8525 ; VLX-NEXT: kmovd %k0, %eax
8526 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8527 ; VLX-NEXT: vzeroupper
8530 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8531 ; NoVLX: # %bb.0: # %entry
8532 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8533 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8534 ; NoVLX-NEXT: kmovw %edi, %k1
8535 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8536 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8537 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8538 ; NoVLX-NEXT: kmovw %k0, %eax
8539 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8540 ; NoVLX-NEXT: vzeroupper
8543 %0 = bitcast <4 x i64> %__a to <4 x i64>
8544 %load = load <4 x i64>, <4 x i64>* %__b
8545 %1 = bitcast <4 x i64> %load to <4 x i64>
8546 %2 = icmp sgt <4 x i64> %0, %1
8547 %3 = bitcast i8 %__u to <8 x i1>
8548 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8549 %4 = and <4 x i1> %2, %extract.i
8550 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8551 %6 = bitcast <8 x i1> %5 to i8
8556 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
8557 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8558 ; VLX: # %bb.0: # %entry
8559 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8560 ; VLX-NEXT: kmovd %k0, %eax
8561 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8562 ; VLX-NEXT: vzeroupper
8565 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8566 ; NoVLX: # %bb.0: # %entry
8567 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8568 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
8569 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8570 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8571 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8572 ; NoVLX-NEXT: kmovw %k0, %eax
8573 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8574 ; NoVLX-NEXT: vzeroupper
8577 %0 = bitcast <4 x i64> %__a to <4 x i64>
8578 %load = load i64, i64* %__b
8579 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8580 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8581 %2 = icmp sgt <4 x i64> %0, %1
8582 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8583 %4 = bitcast <8 x i1> %3 to i8
8587 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
8588 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8589 ; VLX: # %bb.0: # %entry
8590 ; VLX-NEXT: kmovd %edi, %k1
8591 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8592 ; VLX-NEXT: kmovd %k0, %eax
8593 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8594 ; VLX-NEXT: vzeroupper
8597 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8598 ; NoVLX: # %bb.0: # %entry
8599 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8600 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
8601 ; NoVLX-NEXT: kmovw %edi, %k1
8602 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8603 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8604 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8605 ; NoVLX-NEXT: kmovw %k0, %eax
8606 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8607 ; NoVLX-NEXT: vzeroupper
8610 %0 = bitcast <4 x i64> %__a to <4 x i64>
8611 %load = load i64, i64* %__b
8612 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8613 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8614 %2 = icmp sgt <4 x i64> %0, %1
8615 %3 = bitcast i8 %__u to <8 x i1>
8616 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8617 %4 = and <4 x i1> %extract.i, %2
8618 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8619 %6 = bitcast <8 x i1> %5 to i8
8624 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8625 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8626 ; VLX: # %bb.0: # %entry
8627 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8628 ; VLX-NEXT: kmovd %k0, %eax
8629 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8630 ; VLX-NEXT: vzeroupper
8633 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8634 ; NoVLX: # %bb.0: # %entry
8635 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8636 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8637 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8638 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8639 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8640 ; NoVLX-NEXT: kmovw %k0, %eax
8641 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8642 ; NoVLX-NEXT: vzeroupper
8645 %0 = bitcast <4 x i64> %__a to <4 x i64>
8646 %1 = bitcast <4 x i64> %__b to <4 x i64>
8647 %2 = icmp sgt <4 x i64> %0, %1
8648 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8649 %4 = bitcast <16 x i1> %3 to i16
8653 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8654 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8655 ; VLX: # %bb.0: # %entry
8656 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8657 ; VLX-NEXT: kmovd %k0, %eax
8658 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8659 ; VLX-NEXT: vzeroupper
8662 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8663 ; NoVLX: # %bb.0: # %entry
8664 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8665 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8666 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8667 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8668 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8669 ; NoVLX-NEXT: kmovw %k0, %eax
8670 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8671 ; NoVLX-NEXT: vzeroupper
8674 %0 = bitcast <4 x i64> %__a to <4 x i64>
8675 %load = load <4 x i64>, <4 x i64>* %__b
8676 %1 = bitcast <4 x i64> %load to <4 x i64>
8677 %2 = icmp sgt <4 x i64> %0, %1
8678 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8679 %4 = bitcast <16 x i1> %3 to i16
8683 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8684 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8685 ; VLX: # %bb.0: # %entry
8686 ; VLX-NEXT: kmovd %edi, %k1
8687 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8688 ; VLX-NEXT: kmovd %k0, %eax
8689 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8690 ; VLX-NEXT: vzeroupper
8693 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8694 ; NoVLX: # %bb.0: # %entry
8695 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8696 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8697 ; NoVLX-NEXT: kmovw %edi, %k1
8698 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8699 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8700 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8701 ; NoVLX-NEXT: kmovw %k0, %eax
8702 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8703 ; NoVLX-NEXT: vzeroupper
8706 %0 = bitcast <4 x i64> %__a to <4 x i64>
8707 %1 = bitcast <4 x i64> %__b to <4 x i64>
8708 %2 = icmp sgt <4 x i64> %0, %1
8709 %3 = bitcast i8 %__u to <8 x i1>
8710 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8711 %4 = and <4 x i1> %2, %extract.i
8712 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8713 %6 = bitcast <16 x i1> %5 to i16
8717 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8718 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8719 ; VLX: # %bb.0: # %entry
8720 ; VLX-NEXT: kmovd %edi, %k1
8721 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8722 ; VLX-NEXT: kmovd %k0, %eax
8723 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8724 ; VLX-NEXT: vzeroupper
8727 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8728 ; NoVLX: # %bb.0: # %entry
8729 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8730 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8731 ; NoVLX-NEXT: kmovw %edi, %k1
8732 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8733 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8734 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8735 ; NoVLX-NEXT: kmovw %k0, %eax
8736 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8737 ; NoVLX-NEXT: vzeroupper
8740 %0 = bitcast <4 x i64> %__a to <4 x i64>
8741 %load = load <4 x i64>, <4 x i64>* %__b
8742 %1 = bitcast <4 x i64> %load to <4 x i64>
8743 %2 = icmp sgt <4 x i64> %0, %1
8744 %3 = bitcast i8 %__u to <8 x i1>
8745 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8746 %4 = and <4 x i1> %2, %extract.i
8747 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8748 %6 = bitcast <16 x i1> %5 to i16
8753 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
8754 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8755 ; VLX: # %bb.0: # %entry
8756 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8757 ; VLX-NEXT: kmovd %k0, %eax
8758 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8759 ; VLX-NEXT: vzeroupper
8762 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8763 ; NoVLX: # %bb.0: # %entry
8764 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8765 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
8766 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8767 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8768 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8769 ; NoVLX-NEXT: kmovw %k0, %eax
8770 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8771 ; NoVLX-NEXT: vzeroupper
8774 %0 = bitcast <4 x i64> %__a to <4 x i64>
8775 %load = load i64, i64* %__b
8776 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8777 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8778 %2 = icmp sgt <4 x i64> %0, %1
8779 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8780 %4 = bitcast <16 x i1> %3 to i16
8784 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
8785 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8786 ; VLX: # %bb.0: # %entry
8787 ; VLX-NEXT: kmovd %edi, %k1
8788 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8789 ; VLX-NEXT: kmovd %k0, %eax
8790 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8791 ; VLX-NEXT: vzeroupper
8794 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8795 ; NoVLX: # %bb.0: # %entry
8796 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8797 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
8798 ; NoVLX-NEXT: kmovw %edi, %k1
8799 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8800 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8801 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8802 ; NoVLX-NEXT: kmovw %k0, %eax
8803 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8804 ; NoVLX-NEXT: vzeroupper
8807 %0 = bitcast <4 x i64> %__a to <4 x i64>
8808 %load = load i64, i64* %__b
8809 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8810 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8811 %2 = icmp sgt <4 x i64> %0, %1
8812 %3 = bitcast i8 %__u to <8 x i1>
8813 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8814 %4 = and <4 x i1> %extract.i, %2
8815 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8816 %6 = bitcast <16 x i1> %5 to i16
8821 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8822 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8823 ; VLX: # %bb.0: # %entry
8824 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8825 ; VLX-NEXT: kmovd %k0, %eax
8826 ; VLX-NEXT: vzeroupper
8829 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8830 ; NoVLX: # %bb.0: # %entry
8831 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8832 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8833 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8834 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8835 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8836 ; NoVLX-NEXT: kmovw %k0, %eax
8837 ; NoVLX-NEXT: vzeroupper
8840 %0 = bitcast <4 x i64> %__a to <4 x i64>
8841 %1 = bitcast <4 x i64> %__b to <4 x i64>
8842 %2 = icmp sgt <4 x i64> %0, %1
8843 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8844 %4 = bitcast <32 x i1> %3 to i32
8848 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8849 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8850 ; VLX: # %bb.0: # %entry
8851 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8852 ; VLX-NEXT: kmovd %k0, %eax
8853 ; VLX-NEXT: vzeroupper
8856 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8857 ; NoVLX: # %bb.0: # %entry
8858 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8859 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8860 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8861 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8862 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8863 ; NoVLX-NEXT: kmovw %k0, %eax
8864 ; NoVLX-NEXT: vzeroupper
8867 %0 = bitcast <4 x i64> %__a to <4 x i64>
8868 %load = load <4 x i64>, <4 x i64>* %__b
8869 %1 = bitcast <4 x i64> %load to <4 x i64>
8870 %2 = icmp sgt <4 x i64> %0, %1
8871 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8872 %4 = bitcast <32 x i1> %3 to i32
8876 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8877 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8878 ; VLX: # %bb.0: # %entry
8879 ; VLX-NEXT: kmovd %edi, %k1
8880 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8881 ; VLX-NEXT: kmovd %k0, %eax
8882 ; VLX-NEXT: vzeroupper
8885 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8886 ; NoVLX: # %bb.0: # %entry
8887 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8888 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8889 ; NoVLX-NEXT: kmovw %edi, %k1
8890 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8891 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8892 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8893 ; NoVLX-NEXT: kmovw %k0, %eax
8894 ; NoVLX-NEXT: vzeroupper
8897 %0 = bitcast <4 x i64> %__a to <4 x i64>
8898 %1 = bitcast <4 x i64> %__b to <4 x i64>
8899 %2 = icmp sgt <4 x i64> %0, %1
8900 %3 = bitcast i8 %__u to <8 x i1>
8901 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8902 %4 = and <4 x i1> %2, %extract.i
8903 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8904 %6 = bitcast <32 x i1> %5 to i32
8908 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8909 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
8910 ; VLX: # %bb.0: # %entry
8911 ; VLX-NEXT: kmovd %edi, %k1
8912 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8913 ; VLX-NEXT: kmovd %k0, %eax
8914 ; VLX-NEXT: vzeroupper
8917 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
8918 ; NoVLX: # %bb.0: # %entry
8919 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8920 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8921 ; NoVLX-NEXT: kmovw %edi, %k1
8922 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8923 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8924 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8925 ; NoVLX-NEXT: kmovw %k0, %eax
8926 ; NoVLX-NEXT: vzeroupper
8929 %0 = bitcast <4 x i64> %__a to <4 x i64>
8930 %load = load <4 x i64>, <4 x i64>* %__b
8931 %1 = bitcast <4 x i64> %load to <4 x i64>
8932 %2 = icmp sgt <4 x i64> %0, %1
8933 %3 = bitcast i8 %__u to <8 x i1>
8934 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8935 %4 = and <4 x i1> %2, %extract.i
8936 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8937 %6 = bitcast <32 x i1> %5 to i32
8942 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
8943 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8944 ; VLX: # %bb.0: # %entry
8945 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8946 ; VLX-NEXT: kmovd %k0, %eax
8947 ; VLX-NEXT: vzeroupper
8950 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8951 ; NoVLX: # %bb.0: # %entry
8952 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8953 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
8954 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8955 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8956 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8957 ; NoVLX-NEXT: kmovw %k0, %eax
8958 ; NoVLX-NEXT: vzeroupper
8961 %0 = bitcast <4 x i64> %__a to <4 x i64>
8962 %load = load i64, i64* %__b
8963 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8964 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8965 %2 = icmp sgt <4 x i64> %0, %1
8966 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8967 %4 = bitcast <32 x i1> %3 to i32
8971 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
8972 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8973 ; VLX: # %bb.0: # %entry
8974 ; VLX-NEXT: kmovd %edi, %k1
8975 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8976 ; VLX-NEXT: kmovd %k0, %eax
8977 ; VLX-NEXT: vzeroupper
8980 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8981 ; NoVLX: # %bb.0: # %entry
8982 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8983 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
8984 ; NoVLX-NEXT: kmovw %edi, %k1
8985 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8986 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8987 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8988 ; NoVLX-NEXT: kmovw %k0, %eax
8989 ; NoVLX-NEXT: vzeroupper
8992 %0 = bitcast <4 x i64> %__a to <4 x i64>
8993 %load = load i64, i64* %__b
8994 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8995 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8996 %2 = icmp sgt <4 x i64> %0, %1
8997 %3 = bitcast i8 %__u to <8 x i1>
8998 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8999 %4 = and <4 x i1> %extract.i, %2
9000 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9001 %6 = bitcast <32 x i1> %5 to i32
9006 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9007 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
9008 ; VLX: # %bb.0: # %entry
9009 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
9010 ; VLX-NEXT: kmovq %k0, %rax
9011 ; VLX-NEXT: vzeroupper
9014 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
9015 ; NoVLX: # %bb.0: # %entry
9016 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
9017 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9018 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9019 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9020 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9021 ; NoVLX-NEXT: kmovw %k0, %eax
9022 ; NoVLX-NEXT: vzeroupper
9025 %0 = bitcast <4 x i64> %__a to <4 x i64>
9026 %1 = bitcast <4 x i64> %__b to <4 x i64>
9027 %2 = icmp sgt <4 x i64> %0, %1
9028 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9029 %4 = bitcast <64 x i1> %3 to i64
9033 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
9034 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
9035 ; VLX: # %bb.0: # %entry
9036 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
9037 ; VLX-NEXT: kmovq %k0, %rax
9038 ; VLX-NEXT: vzeroupper
9041 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
9042 ; NoVLX: # %bb.0: # %entry
9043 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9044 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
9045 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9046 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9047 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9048 ; NoVLX-NEXT: kmovw %k0, %eax
9049 ; NoVLX-NEXT: vzeroupper
9052 %0 = bitcast <4 x i64> %__a to <4 x i64>
9053 %load = load <4 x i64>, <4 x i64>* %__b
9054 %1 = bitcast <4 x i64> %load to <4 x i64>
9055 %2 = icmp sgt <4 x i64> %0, %1
9056 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9057 %4 = bitcast <64 x i1> %3 to i64
9061 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9062 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9063 ; VLX: # %bb.0: # %entry
9064 ; VLX-NEXT: kmovd %edi, %k1
9065 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
9066 ; VLX-NEXT: kmovq %k0, %rax
9067 ; VLX-NEXT: vzeroupper
9070 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9071 ; NoVLX: # %bb.0: # %entry
9072 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
9073 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9074 ; NoVLX-NEXT: kmovw %edi, %k1
9075 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9076 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9077 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9078 ; NoVLX-NEXT: kmovw %k0, %eax
9079 ; NoVLX-NEXT: vzeroupper
9082 %0 = bitcast <4 x i64> %__a to <4 x i64>
9083 %1 = bitcast <4 x i64> %__b to <4 x i64>
9084 %2 = icmp sgt <4 x i64> %0, %1
9085 %3 = bitcast i8 %__u to <8 x i1>
9086 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9087 %4 = and <4 x i1> %2, %extract.i
9088 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9089 %6 = bitcast <64 x i1> %5 to i64
9093 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
9094 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9095 ; VLX: # %bb.0: # %entry
9096 ; VLX-NEXT: kmovd %edi, %k1
9097 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
9098 ; VLX-NEXT: kmovq %k0, %rax
9099 ; VLX-NEXT: vzeroupper
9102 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9103 ; NoVLX: # %bb.0: # %entry
9104 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9105 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
9106 ; NoVLX-NEXT: kmovw %edi, %k1
9107 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9108 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9109 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9110 ; NoVLX-NEXT: kmovw %k0, %eax
9111 ; NoVLX-NEXT: vzeroupper
9114 %0 = bitcast <4 x i64> %__a to <4 x i64>
9115 %load = load <4 x i64>, <4 x i64>* %__b
9116 %1 = bitcast <4 x i64> %load to <4 x i64>
9117 %2 = icmp sgt <4 x i64> %0, %1
9118 %3 = bitcast i8 %__u to <8 x i1>
9119 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9120 %4 = and <4 x i1> %2, %extract.i
9121 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9122 %6 = bitcast <64 x i1> %5 to i64
9127 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
9128 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9129 ; VLX: # %bb.0: # %entry
9130 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
9131 ; VLX-NEXT: kmovq %k0, %rax
9132 ; VLX-NEXT: vzeroupper
9135 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9136 ; NoVLX: # %bb.0: # %entry
9137 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9138 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
9139 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9140 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9141 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9142 ; NoVLX-NEXT: kmovw %k0, %eax
9143 ; NoVLX-NEXT: vzeroupper
9146 %0 = bitcast <4 x i64> %__a to <4 x i64>
9147 %load = load i64, i64* %__b
9148 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9149 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9150 %2 = icmp sgt <4 x i64> %0, %1
9151 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9152 %4 = bitcast <64 x i1> %3 to i64
9156 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
9157 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9158 ; VLX: # %bb.0: # %entry
9159 ; VLX-NEXT: kmovd %edi, %k1
9160 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
9161 ; VLX-NEXT: kmovq %k0, %rax
9162 ; VLX-NEXT: vzeroupper
9165 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9166 ; NoVLX: # %bb.0: # %entry
9167 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9168 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
9169 ; NoVLX-NEXT: kmovw %edi, %k1
9170 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9171 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9172 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9173 ; NoVLX-NEXT: kmovw %k0, %eax
9174 ; NoVLX-NEXT: vzeroupper
9177 %0 = bitcast <4 x i64> %__a to <4 x i64>
9178 %load = load i64, i64* %__b
9179 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9180 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9181 %2 = icmp sgt <4 x i64> %0, %1
9182 %3 = bitcast i8 %__u to <8 x i1>
9183 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9184 %4 = and <4 x i1> %extract.i, %2
9185 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9186 %6 = bitcast <64 x i1> %5 to i64
9191 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9192 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9193 ; VLX: # %bb.0: # %entry
9194 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9195 ; VLX-NEXT: kmovd %k0, %eax
9196 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9197 ; VLX-NEXT: vzeroupper
9200 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9201 ; NoVLX: # %bb.0: # %entry
9202 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9203 ; NoVLX-NEXT: kmovw %k0, %eax
9204 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9205 ; NoVLX-NEXT: vzeroupper
9208 %0 = bitcast <8 x i64> %__a to <8 x i64>
9209 %1 = bitcast <8 x i64> %__b to <8 x i64>
9210 %2 = icmp sgt <8 x i64> %0, %1
9211 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9212 %4 = bitcast <16 x i1> %3 to i16
9216 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9217 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9218 ; VLX: # %bb.0: # %entry
9219 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9220 ; VLX-NEXT: kmovd %k0, %eax
9221 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9222 ; VLX-NEXT: vzeroupper
9225 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9226 ; NoVLX: # %bb.0: # %entry
9227 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9228 ; NoVLX-NEXT: kmovw %k0, %eax
9229 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9230 ; NoVLX-NEXT: vzeroupper
9233 %0 = bitcast <8 x i64> %__a to <8 x i64>
9234 %load = load <8 x i64>, <8 x i64>* %__b
9235 %1 = bitcast <8 x i64> %load to <8 x i64>
9236 %2 = icmp sgt <8 x i64> %0, %1
9237 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9238 %4 = bitcast <16 x i1> %3 to i16
9242 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9243 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9244 ; VLX: # %bb.0: # %entry
9245 ; VLX-NEXT: kmovd %edi, %k1
9246 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9247 ; VLX-NEXT: kmovd %k0, %eax
9248 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9249 ; VLX-NEXT: vzeroupper
9252 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9253 ; NoVLX: # %bb.0: # %entry
9254 ; NoVLX-NEXT: kmovw %edi, %k1
9255 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9256 ; NoVLX-NEXT: kmovw %k0, %eax
9257 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9258 ; NoVLX-NEXT: vzeroupper
9261 %0 = bitcast <8 x i64> %__a to <8 x i64>
9262 %1 = bitcast <8 x i64> %__b to <8 x i64>
9263 %2 = icmp sgt <8 x i64> %0, %1
9264 %3 = bitcast i8 %__u to <8 x i1>
9265 %4 = and <8 x i1> %2, %3
9266 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9267 %6 = bitcast <16 x i1> %5 to i16
9271 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9272 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9273 ; VLX: # %bb.0: # %entry
9274 ; VLX-NEXT: kmovd %edi, %k1
9275 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9276 ; VLX-NEXT: kmovd %k0, %eax
9277 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9278 ; VLX-NEXT: vzeroupper
9281 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9282 ; NoVLX: # %bb.0: # %entry
9283 ; NoVLX-NEXT: kmovw %edi, %k1
9284 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9285 ; NoVLX-NEXT: kmovw %k0, %eax
9286 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9287 ; NoVLX-NEXT: vzeroupper
9290 %0 = bitcast <8 x i64> %__a to <8 x i64>
9291 %load = load <8 x i64>, <8 x i64>* %__b
9292 %1 = bitcast <8 x i64> %load to <8 x i64>
9293 %2 = icmp sgt <8 x i64> %0, %1
9294 %3 = bitcast i8 %__u to <8 x i1>
9295 %4 = and <8 x i1> %2, %3
9296 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9297 %6 = bitcast <16 x i1> %5 to i16
9302 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9303 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9304 ; VLX: # %bb.0: # %entry
9305 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9306 ; VLX-NEXT: kmovd %k0, %eax
9307 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9308 ; VLX-NEXT: vzeroupper
9311 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9312 ; NoVLX: # %bb.0: # %entry
9313 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9314 ; NoVLX-NEXT: kmovw %k0, %eax
9315 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9316 ; NoVLX-NEXT: vzeroupper
9319 %0 = bitcast <8 x i64> %__a to <8 x i64>
9320 %load = load i64, i64* %__b
9321 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9322 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9323 %2 = icmp sgt <8 x i64> %0, %1
9324 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9325 %4 = bitcast <16 x i1> %3 to i16
9329 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9330 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9331 ; VLX: # %bb.0: # %entry
9332 ; VLX-NEXT: kmovd %edi, %k1
9333 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9334 ; VLX-NEXT: kmovd %k0, %eax
9335 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9336 ; VLX-NEXT: vzeroupper
9339 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9340 ; NoVLX: # %bb.0: # %entry
9341 ; NoVLX-NEXT: kmovw %edi, %k1
9342 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9343 ; NoVLX-NEXT: kmovw %k0, %eax
9344 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9345 ; NoVLX-NEXT: vzeroupper
9348 %0 = bitcast <8 x i64> %__a to <8 x i64>
9349 %load = load i64, i64* %__b
9350 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9351 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9352 %2 = icmp sgt <8 x i64> %0, %1
9353 %3 = bitcast i8 %__u to <8 x i1>
9354 %4 = and <8 x i1> %3, %2
9355 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9356 %6 = bitcast <16 x i1> %5 to i16
9361 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9362 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9363 ; VLX: # %bb.0: # %entry
9364 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9365 ; VLX-NEXT: kmovd %k0, %eax
9366 ; VLX-NEXT: vzeroupper
9369 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9370 ; NoVLX: # %bb.0: # %entry
9371 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9372 ; NoVLX-NEXT: kmovw %k0, %eax
9373 ; NoVLX-NEXT: vzeroupper
9376 %0 = bitcast <8 x i64> %__a to <8 x i64>
9377 %1 = bitcast <8 x i64> %__b to <8 x i64>
9378 %2 = icmp sgt <8 x i64> %0, %1
9379 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9380 %4 = bitcast <32 x i1> %3 to i32
9384 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9385 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9386 ; VLX: # %bb.0: # %entry
9387 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9388 ; VLX-NEXT: kmovd %k0, %eax
9389 ; VLX-NEXT: vzeroupper
9392 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9393 ; NoVLX: # %bb.0: # %entry
9394 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9395 ; NoVLX-NEXT: kmovw %k0, %eax
9396 ; NoVLX-NEXT: vzeroupper
9399 %0 = bitcast <8 x i64> %__a to <8 x i64>
9400 %load = load <8 x i64>, <8 x i64>* %__b
9401 %1 = bitcast <8 x i64> %load to <8 x i64>
9402 %2 = icmp sgt <8 x i64> %0, %1
9403 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9404 %4 = bitcast <32 x i1> %3 to i32
9408 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9409 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9410 ; VLX: # %bb.0: # %entry
9411 ; VLX-NEXT: kmovd %edi, %k1
9412 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9413 ; VLX-NEXT: kmovd %k0, %eax
9414 ; VLX-NEXT: vzeroupper
9417 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9418 ; NoVLX: # %bb.0: # %entry
9419 ; NoVLX-NEXT: kmovw %edi, %k1
9420 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9421 ; NoVLX-NEXT: kmovw %k0, %eax
9422 ; NoVLX-NEXT: vzeroupper
9425 %0 = bitcast <8 x i64> %__a to <8 x i64>
9426 %1 = bitcast <8 x i64> %__b to <8 x i64>
9427 %2 = icmp sgt <8 x i64> %0, %1
9428 %3 = bitcast i8 %__u to <8 x i1>
9429 %4 = and <8 x i1> %2, %3
9430 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9431 %6 = bitcast <32 x i1> %5 to i32
9435 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9436 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9437 ; VLX: # %bb.0: # %entry
9438 ; VLX-NEXT: kmovd %edi, %k1
9439 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9440 ; VLX-NEXT: kmovd %k0, %eax
9441 ; VLX-NEXT: vzeroupper
9444 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9445 ; NoVLX: # %bb.0: # %entry
9446 ; NoVLX-NEXT: kmovw %edi, %k1
9447 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9448 ; NoVLX-NEXT: kmovw %k0, %eax
9449 ; NoVLX-NEXT: vzeroupper
9452 %0 = bitcast <8 x i64> %__a to <8 x i64>
9453 %load = load <8 x i64>, <8 x i64>* %__b
9454 %1 = bitcast <8 x i64> %load to <8 x i64>
9455 %2 = icmp sgt <8 x i64> %0, %1
9456 %3 = bitcast i8 %__u to <8 x i1>
9457 %4 = and <8 x i1> %2, %3
9458 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9459 %6 = bitcast <32 x i1> %5 to i32
9464 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9465 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9466 ; VLX: # %bb.0: # %entry
9467 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9468 ; VLX-NEXT: kmovd %k0, %eax
9469 ; VLX-NEXT: vzeroupper
9472 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9473 ; NoVLX: # %bb.0: # %entry
9474 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9475 ; NoVLX-NEXT: kmovw %k0, %eax
9476 ; NoVLX-NEXT: vzeroupper
9479 %0 = bitcast <8 x i64> %__a to <8 x i64>
9480 %load = load i64, i64* %__b
9481 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9482 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9483 %2 = icmp sgt <8 x i64> %0, %1
9484 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9485 %4 = bitcast <32 x i1> %3 to i32
9489 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9490 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9491 ; VLX: # %bb.0: # %entry
9492 ; VLX-NEXT: kmovd %edi, %k1
9493 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9494 ; VLX-NEXT: kmovd %k0, %eax
9495 ; VLX-NEXT: vzeroupper
9498 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9499 ; NoVLX: # %bb.0: # %entry
9500 ; NoVLX-NEXT: kmovw %edi, %k1
9501 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9502 ; NoVLX-NEXT: kmovw %k0, %eax
9503 ; NoVLX-NEXT: vzeroupper
9506 %0 = bitcast <8 x i64> %__a to <8 x i64>
9507 %load = load i64, i64* %__b
9508 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9509 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9510 %2 = icmp sgt <8 x i64> %0, %1
9511 %3 = bitcast i8 %__u to <8 x i1>
9512 %4 = and <8 x i1> %3, %2
9513 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9514 %6 = bitcast <32 x i1> %5 to i32
9519 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9520 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9521 ; VLX: # %bb.0: # %entry
9522 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9523 ; VLX-NEXT: kmovq %k0, %rax
9524 ; VLX-NEXT: vzeroupper
9527 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9528 ; NoVLX: # %bb.0: # %entry
9529 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9530 ; NoVLX-NEXT: kmovw %k0, %eax
9531 ; NoVLX-NEXT: vzeroupper
9534 %0 = bitcast <8 x i64> %__a to <8 x i64>
9535 %1 = bitcast <8 x i64> %__b to <8 x i64>
9536 %2 = icmp sgt <8 x i64> %0, %1
9537 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9538 %4 = bitcast <64 x i1> %3 to i64
9542 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9543 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9544 ; VLX: # %bb.0: # %entry
9545 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9546 ; VLX-NEXT: kmovq %k0, %rax
9547 ; VLX-NEXT: vzeroupper
9550 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9551 ; NoVLX: # %bb.0: # %entry
9552 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9553 ; NoVLX-NEXT: kmovw %k0, %eax
9554 ; NoVLX-NEXT: vzeroupper
9557 %0 = bitcast <8 x i64> %__a to <8 x i64>
9558 %load = load <8 x i64>, <8 x i64>* %__b
9559 %1 = bitcast <8 x i64> %load to <8 x i64>
9560 %2 = icmp sgt <8 x i64> %0, %1
9561 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9562 %4 = bitcast <64 x i1> %3 to i64
9566 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9567 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9568 ; VLX: # %bb.0: # %entry
9569 ; VLX-NEXT: kmovd %edi, %k1
9570 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9571 ; VLX-NEXT: kmovq %k0, %rax
9572 ; VLX-NEXT: vzeroupper
9575 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9576 ; NoVLX: # %bb.0: # %entry
9577 ; NoVLX-NEXT: kmovw %edi, %k1
9578 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9579 ; NoVLX-NEXT: kmovw %k0, %eax
9580 ; NoVLX-NEXT: vzeroupper
9583 %0 = bitcast <8 x i64> %__a to <8 x i64>
9584 %1 = bitcast <8 x i64> %__b to <8 x i64>
9585 %2 = icmp sgt <8 x i64> %0, %1
9586 %3 = bitcast i8 %__u to <8 x i1>
9587 %4 = and <8 x i1> %2, %3
9588 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9589 %6 = bitcast <64 x i1> %5 to i64
9593 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9594 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9595 ; VLX: # %bb.0: # %entry
9596 ; VLX-NEXT: kmovd %edi, %k1
9597 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9598 ; VLX-NEXT: kmovq %k0, %rax
9599 ; VLX-NEXT: vzeroupper
9602 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9603 ; NoVLX: # %bb.0: # %entry
9604 ; NoVLX-NEXT: kmovw %edi, %k1
9605 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9606 ; NoVLX-NEXT: kmovw %k0, %eax
9607 ; NoVLX-NEXT: vzeroupper
9610 %0 = bitcast <8 x i64> %__a to <8 x i64>
9611 %load = load <8 x i64>, <8 x i64>* %__b
9612 %1 = bitcast <8 x i64> %load to <8 x i64>
9613 %2 = icmp sgt <8 x i64> %0, %1
9614 %3 = bitcast i8 %__u to <8 x i1>
9615 %4 = and <8 x i1> %2, %3
9616 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9617 %6 = bitcast <64 x i1> %5 to i64
9622 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9623 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9624 ; VLX: # %bb.0: # %entry
9625 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9626 ; VLX-NEXT: kmovq %k0, %rax
9627 ; VLX-NEXT: vzeroupper
9630 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9631 ; NoVLX: # %bb.0: # %entry
9632 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9633 ; NoVLX-NEXT: kmovw %k0, %eax
9634 ; NoVLX-NEXT: vzeroupper
9637 %0 = bitcast <8 x i64> %__a to <8 x i64>
9638 %load = load i64, i64* %__b
9639 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9640 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9641 %2 = icmp sgt <8 x i64> %0, %1
9642 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9643 %4 = bitcast <64 x i1> %3 to i64
9647 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9648 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9649 ; VLX: # %bb.0: # %entry
9650 ; VLX-NEXT: kmovd %edi, %k1
9651 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9652 ; VLX-NEXT: kmovq %k0, %rax
9653 ; VLX-NEXT: vzeroupper
9656 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9657 ; NoVLX: # %bb.0: # %entry
9658 ; NoVLX-NEXT: kmovw %edi, %k1
9659 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9660 ; NoVLX-NEXT: kmovw %k0, %eax
9661 ; NoVLX-NEXT: vzeroupper
9664 %0 = bitcast <8 x i64> %__a to <8 x i64>
9665 %load = load i64, i64* %__b
9666 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9667 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9668 %2 = icmp sgt <8 x i64> %0, %1
9669 %3 = bitcast i8 %__u to <8 x i1>
9670 %4 = and <8 x i1> %3, %2
9671 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9672 %6 = bitcast <64 x i1> %5 to i64
9677 define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9678 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9679 ; VLX: # %bb.0: # %entry
9680 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0
9681 ; VLX-NEXT: kmovd %k0, %eax
9684 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9685 ; NoVLX: # %bb.0: # %entry
9686 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9687 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9688 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9689 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9690 ; NoVLX-NEXT: kmovw %k0, %eax
9691 ; NoVLX-NEXT: vzeroupper
9694 %0 = bitcast <2 x i64> %__a to <16 x i8>
9695 %1 = bitcast <2 x i64> %__b to <16 x i8>
9696 %2 = icmp sge <16 x i8> %0, %1
9697 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9698 %4 = bitcast <32 x i1> %3 to i32
9702 define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9703 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9704 ; VLX: # %bb.0: # %entry
9705 ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
9706 ; VLX-NEXT: kmovd %k0, %eax
9709 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9710 ; NoVLX: # %bb.0: # %entry
9711 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
9712 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9713 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9714 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9715 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9716 ; NoVLX-NEXT: kmovw %k0, %eax
9717 ; NoVLX-NEXT: vzeroupper
9720 %0 = bitcast <2 x i64> %__a to <16 x i8>
9721 %load = load <2 x i64>, <2 x i64>* %__b
9722 %1 = bitcast <2 x i64> %load to <16 x i8>
9723 %2 = icmp sge <16 x i8> %0, %1
9724 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9725 %4 = bitcast <32 x i1> %3 to i32
9729 define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9730 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9731 ; VLX: # %bb.0: # %entry
9732 ; VLX-NEXT: kmovd %edi, %k1
9733 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9734 ; VLX-NEXT: kmovd %k0, %eax
9737 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9738 ; NoVLX: # %bb.0: # %entry
9739 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9740 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9741 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9742 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9743 ; NoVLX-NEXT: kmovw %k0, %eax
9744 ; NoVLX-NEXT: andl %edi, %eax
9745 ; NoVLX-NEXT: vzeroupper
9748 %0 = bitcast <2 x i64> %__a to <16 x i8>
9749 %1 = bitcast <2 x i64> %__b to <16 x i8>
9750 %2 = icmp sge <16 x i8> %0, %1
9751 %3 = bitcast i16 %__u to <16 x i1>
9752 %4 = and <16 x i1> %2, %3
9753 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9754 %6 = bitcast <32 x i1> %5 to i32
9758 define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9759 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9760 ; VLX: # %bb.0: # %entry
9761 ; VLX-NEXT: kmovd %edi, %k1
9762 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9763 ; VLX-NEXT: kmovd %k0, %eax
9766 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9767 ; NoVLX: # %bb.0: # %entry
9768 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
9769 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9770 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9771 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9772 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9773 ; NoVLX-NEXT: kmovw %k0, %eax
9774 ; NoVLX-NEXT: andl %edi, %eax
9775 ; NoVLX-NEXT: vzeroupper
9778 %0 = bitcast <2 x i64> %__a to <16 x i8>
9779 %load = load <2 x i64>, <2 x i64>* %__b
9780 %1 = bitcast <2 x i64> %load to <16 x i8>
9781 %2 = icmp sge <16 x i8> %0, %1
9782 %3 = bitcast i16 %__u to <16 x i1>
9783 %4 = and <16 x i1> %2, %3
9784 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9785 %6 = bitcast <32 x i1> %5 to i32
9790 define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9791 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9792 ; VLX: # %bb.0: # %entry
9793 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0
9794 ; VLX-NEXT: kmovq %k0, %rax
9797 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9798 ; NoVLX: # %bb.0: # %entry
9799 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9800 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9801 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9802 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9803 ; NoVLX-NEXT: kmovw %k0, %eax
9804 ; NoVLX-NEXT: vzeroupper
9807 %0 = bitcast <2 x i64> %__a to <16 x i8>
9808 %1 = bitcast <2 x i64> %__b to <16 x i8>
9809 %2 = icmp sge <16 x i8> %0, %1
9810 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9811 %4 = bitcast <64 x i1> %3 to i64
9815 define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9816 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9817 ; VLX: # %bb.0: # %entry
9818 ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
9819 ; VLX-NEXT: kmovq %k0, %rax
9822 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9823 ; NoVLX: # %bb.0: # %entry
9824 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
9825 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9826 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9827 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9828 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9829 ; NoVLX-NEXT: kmovw %k0, %eax
9830 ; NoVLX-NEXT: vzeroupper
9833 %0 = bitcast <2 x i64> %__a to <16 x i8>
9834 %load = load <2 x i64>, <2 x i64>* %__b
9835 %1 = bitcast <2 x i64> %load to <16 x i8>
9836 %2 = icmp sge <16 x i8> %0, %1
9837 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9838 %4 = bitcast <64 x i1> %3 to i64
9842 define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9843 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9844 ; VLX: # %bb.0: # %entry
9845 ; VLX-NEXT: kmovd %edi, %k1
9846 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9847 ; VLX-NEXT: kmovq %k0, %rax
9850 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9851 ; NoVLX: # %bb.0: # %entry
9852 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9853 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9854 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9855 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9856 ; NoVLX-NEXT: kmovw %k0, %eax
9857 ; NoVLX-NEXT: andl %edi, %eax
9858 ; NoVLX-NEXT: vzeroupper
9861 %0 = bitcast <2 x i64> %__a to <16 x i8>
9862 %1 = bitcast <2 x i64> %__b to <16 x i8>
9863 %2 = icmp sge <16 x i8> %0, %1
9864 %3 = bitcast i16 %__u to <16 x i1>
9865 %4 = and <16 x i1> %2, %3
9866 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9867 %6 = bitcast <64 x i1> %5 to i64
9871 define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9872 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9873 ; VLX: # %bb.0: # %entry
9874 ; VLX-NEXT: kmovd %edi, %k1
9875 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9876 ; VLX-NEXT: kmovq %k0, %rax
9879 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9880 ; NoVLX: # %bb.0: # %entry
9881 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
9882 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9883 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9884 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9885 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9886 ; NoVLX-NEXT: kmovw %k0, %eax
9887 ; NoVLX-NEXT: andl %edi, %eax
9888 ; NoVLX-NEXT: vzeroupper
9891 %0 = bitcast <2 x i64> %__a to <16 x i8>
9892 %load = load <2 x i64>, <2 x i64>* %__b
9893 %1 = bitcast <2 x i64> %load to <16 x i8>
9894 %2 = icmp sge <16 x i8> %0, %1
9895 %3 = bitcast i16 %__u to <16 x i1>
9896 %4 = and <16 x i1> %2, %3
9897 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9898 %6 = bitcast <64 x i1> %5 to i64
9903 define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9904 ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
9905 ; VLX: # %bb.0: # %entry
9906 ; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0
9907 ; VLX-NEXT: kmovq %k0, %rax
9908 ; VLX-NEXT: vzeroupper
9911 ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
9912 ; NoVLX: # %bb.0: # %entry
9913 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9914 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9915 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9916 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9917 ; NoVLX-NEXT: kmovw %k0, %ecx
9918 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9919 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9920 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9921 ; NoVLX-NEXT: kmovw %k0, %eax
9922 ; NoVLX-NEXT: shll $16, %eax
9923 ; NoVLX-NEXT: orl %ecx, %eax
9924 ; NoVLX-NEXT: vzeroupper
9927 %0 = bitcast <4 x i64> %__a to <32 x i8>
9928 %1 = bitcast <4 x i64> %__b to <32 x i8>
9929 %2 = icmp sge <32 x i8> %0, %1
9930 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9931 %4 = bitcast <64 x i1> %3 to i64
9935 define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
9936 ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
9937 ; VLX: # %bb.0: # %entry
9938 ; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0
9939 ; VLX-NEXT: kmovq %k0, %rax
9940 ; VLX-NEXT: vzeroupper
9943 ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
9944 ; NoVLX: # %bb.0: # %entry
9945 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
9946 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9947 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9948 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9949 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9950 ; NoVLX-NEXT: kmovw %k0, %ecx
9951 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9952 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9953 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9954 ; NoVLX-NEXT: kmovw %k0, %eax
9955 ; NoVLX-NEXT: shll $16, %eax
9956 ; NoVLX-NEXT: orl %ecx, %eax
9957 ; NoVLX-NEXT: vzeroupper
9960 %0 = bitcast <4 x i64> %__a to <32 x i8>
9961 %load = load <4 x i64>, <4 x i64>* %__b
9962 %1 = bitcast <4 x i64> %load to <32 x i8>
9963 %2 = icmp sge <32 x i8> %0, %1
9964 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9965 %4 = bitcast <64 x i1> %3 to i64
9969 define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9970 ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
9971 ; VLX: # %bb.0: # %entry
9972 ; VLX-NEXT: kmovd %edi, %k1
9973 ; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1}
9974 ; VLX-NEXT: kmovq %k0, %rax
9975 ; VLX-NEXT: vzeroupper
9978 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
9979 ; NoVLX: # %bb.0: # %entry
9980 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9981 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9982 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9983 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9984 ; NoVLX-NEXT: kmovw %k0, %eax
9985 ; NoVLX-NEXT: andl %edi, %eax
9986 ; NoVLX-NEXT: shrl $16, %edi
9987 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9988 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9989 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9990 ; NoVLX-NEXT: kmovw %k0, %ecx
9991 ; NoVLX-NEXT: andl %edi, %ecx
9992 ; NoVLX-NEXT: shll $16, %ecx
9993 ; NoVLX-NEXT: movzwl %ax, %eax
9994 ; NoVLX-NEXT: orl %ecx, %eax
9995 ; NoVLX-NEXT: vzeroupper
9998 %0 = bitcast <4 x i64> %__a to <32 x i8>
9999 %1 = bitcast <4 x i64> %__b to <32 x i8>
10000 %2 = icmp sge <32 x i8> %0, %1
10001 %3 = bitcast i32 %__u to <32 x i1>
10002 %4 = and <32 x i1> %2, %3
10003 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10004 %6 = bitcast <64 x i1> %5 to i64
10008 define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10009 ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
10010 ; VLX: # %bb.0: # %entry
10011 ; VLX-NEXT: kmovd %edi, %k1
10012 ; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
10013 ; VLX-NEXT: kmovq %k0, %rax
10014 ; VLX-NEXT: vzeroupper
10017 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
10018 ; NoVLX: # %bb.0: # %entry
10019 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10020 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
10021 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10022 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
10023 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
10024 ; NoVLX-NEXT: kmovw %k0, %eax
10025 ; NoVLX-NEXT: andl %edi, %eax
10026 ; NoVLX-NEXT: shrl $16, %edi
10027 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
10028 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
10029 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10030 ; NoVLX-NEXT: kmovw %k0, %ecx
10031 ; NoVLX-NEXT: andl %edi, %ecx
10032 ; NoVLX-NEXT: shll $16, %ecx
10033 ; NoVLX-NEXT: movzwl %ax, %eax
10034 ; NoVLX-NEXT: orl %ecx, %eax
10035 ; NoVLX-NEXT: vzeroupper
10038 %0 = bitcast <4 x i64> %__a to <32 x i8>
10039 %load = load <4 x i64>, <4 x i64>* %__b
10040 %1 = bitcast <4 x i64> %load to <32 x i8>
10041 %2 = icmp sge <32 x i8> %0, %1
10042 %3 = bitcast i32 %__u to <32 x i1>
10043 %4 = and <32 x i1> %2, %3
10044 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10045 %6 = bitcast <64 x i1> %5 to i64
10050 define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10051 ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
10052 ; VLX: # %bb.0: # %entry
10053 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10054 ; VLX-NEXT: kmovd %k0, %eax
10055 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10058 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
10059 ; NoVLX: # %bb.0: # %entry
10060 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10061 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10062 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10063 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10064 ; NoVLX-NEXT: kmovw %k0, %eax
10065 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10066 ; NoVLX-NEXT: vzeroupper
10069 %0 = bitcast <2 x i64> %__a to <8 x i16>
10070 %1 = bitcast <2 x i64> %__b to <8 x i16>
10071 %2 = icmp sge <8 x i16> %0, %1
10072 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10073 %4 = bitcast <16 x i1> %3 to i16
10077 define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10078 ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10079 ; VLX: # %bb.0: # %entry
10080 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10081 ; VLX-NEXT: kmovd %k0, %eax
10082 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10085 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10086 ; NoVLX: # %bb.0: # %entry
10087 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10088 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10089 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10090 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10091 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10092 ; NoVLX-NEXT: kmovw %k0, %eax
10093 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10094 ; NoVLX-NEXT: vzeroupper
10097 %0 = bitcast <2 x i64> %__a to <8 x i16>
10098 %load = load <2 x i64>, <2 x i64>* %__b
10099 %1 = bitcast <2 x i64> %load to <8 x i16>
10100 %2 = icmp sge <8 x i16> %0, %1
10101 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10102 %4 = bitcast <16 x i1> %3 to i16
10106 define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10107 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10108 ; VLX: # %bb.0: # %entry
10109 ; VLX-NEXT: kmovd %edi, %k1
10110 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10111 ; VLX-NEXT: kmovd %k0, %eax
10112 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10115 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10116 ; NoVLX: # %bb.0: # %entry
10117 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10118 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10119 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10120 ; NoVLX-NEXT: kmovw %edi, %k1
10121 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10122 ; NoVLX-NEXT: kmovw %k0, %eax
10123 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10124 ; NoVLX-NEXT: vzeroupper
10127 %0 = bitcast <2 x i64> %__a to <8 x i16>
10128 %1 = bitcast <2 x i64> %__b to <8 x i16>
10129 %2 = icmp sge <8 x i16> %0, %1
10130 %3 = bitcast i8 %__u to <8 x i1>
10131 %4 = and <8 x i1> %2, %3
10132 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10133 %6 = bitcast <16 x i1> %5 to i16
10137 define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10138 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10139 ; VLX: # %bb.0: # %entry
10140 ; VLX-NEXT: kmovd %edi, %k1
10141 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10142 ; VLX-NEXT: kmovd %k0, %eax
10143 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10146 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10147 ; NoVLX: # %bb.0: # %entry
10148 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10149 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10150 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10151 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10152 ; NoVLX-NEXT: kmovw %edi, %k1
10153 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10154 ; NoVLX-NEXT: kmovw %k0, %eax
10155 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10156 ; NoVLX-NEXT: vzeroupper
10159 %0 = bitcast <2 x i64> %__a to <8 x i16>
10160 %load = load <2 x i64>, <2 x i64>* %__b
10161 %1 = bitcast <2 x i64> %load to <8 x i16>
10162 %2 = icmp sge <8 x i16> %0, %1
10163 %3 = bitcast i8 %__u to <8 x i1>
10164 %4 = and <8 x i1> %2, %3
10165 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10166 %6 = bitcast <16 x i1> %5 to i16
10171 define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10172 ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10173 ; VLX: # %bb.0: # %entry
10174 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10175 ; VLX-NEXT: kmovd %k0, %eax
10178 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10179 ; NoVLX: # %bb.0: # %entry
10180 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10181 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10182 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10183 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10184 ; NoVLX-NEXT: kmovw %k0, %eax
10185 ; NoVLX-NEXT: vzeroupper
10188 %0 = bitcast <2 x i64> %__a to <8 x i16>
10189 %1 = bitcast <2 x i64> %__b to <8 x i16>
10190 %2 = icmp sge <8 x i16> %0, %1
10191 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10192 %4 = bitcast <32 x i1> %3 to i32
10196 define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10197 ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10198 ; VLX: # %bb.0: # %entry
10199 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10200 ; VLX-NEXT: kmovd %k0, %eax
10203 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10204 ; NoVLX: # %bb.0: # %entry
10205 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10206 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10207 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10208 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10209 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10210 ; NoVLX-NEXT: kmovw %k0, %eax
10211 ; NoVLX-NEXT: vzeroupper
10214 %0 = bitcast <2 x i64> %__a to <8 x i16>
10215 %load = load <2 x i64>, <2 x i64>* %__b
10216 %1 = bitcast <2 x i64> %load to <8 x i16>
10217 %2 = icmp sge <8 x i16> %0, %1
10218 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10219 %4 = bitcast <32 x i1> %3 to i32
10223 define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10224 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10225 ; VLX: # %bb.0: # %entry
10226 ; VLX-NEXT: kmovd %edi, %k1
10227 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10228 ; VLX-NEXT: kmovd %k0, %eax
10231 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10232 ; NoVLX: # %bb.0: # %entry
10233 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10234 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10235 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10236 ; NoVLX-NEXT: kmovw %edi, %k1
10237 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10238 ; NoVLX-NEXT: kmovw %k0, %eax
10239 ; NoVLX-NEXT: vzeroupper
10242 %0 = bitcast <2 x i64> %__a to <8 x i16>
10243 %1 = bitcast <2 x i64> %__b to <8 x i16>
10244 %2 = icmp sge <8 x i16> %0, %1
10245 %3 = bitcast i8 %__u to <8 x i1>
10246 %4 = and <8 x i1> %2, %3
10247 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10248 %6 = bitcast <32 x i1> %5 to i32
10252 define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10253 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10254 ; VLX: # %bb.0: # %entry
10255 ; VLX-NEXT: kmovd %edi, %k1
10256 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10257 ; VLX-NEXT: kmovd %k0, %eax
10260 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10261 ; NoVLX: # %bb.0: # %entry
10262 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10263 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10264 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10265 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10266 ; NoVLX-NEXT: kmovw %edi, %k1
10267 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10268 ; NoVLX-NEXT: kmovw %k0, %eax
10269 ; NoVLX-NEXT: vzeroupper
10272 %0 = bitcast <2 x i64> %__a to <8 x i16>
10273 %load = load <2 x i64>, <2 x i64>* %__b
10274 %1 = bitcast <2 x i64> %load to <8 x i16>
10275 %2 = icmp sge <8 x i16> %0, %1
10276 %3 = bitcast i8 %__u to <8 x i1>
10277 %4 = and <8 x i1> %2, %3
10278 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10279 %6 = bitcast <32 x i1> %5 to i32
10284 define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10285 ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10286 ; VLX: # %bb.0: # %entry
10287 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10288 ; VLX-NEXT: kmovq %k0, %rax
10291 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10292 ; NoVLX: # %bb.0: # %entry
10293 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10294 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10295 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10296 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10297 ; NoVLX-NEXT: kmovw %k0, %eax
10298 ; NoVLX-NEXT: vzeroupper
10301 %0 = bitcast <2 x i64> %__a to <8 x i16>
10302 %1 = bitcast <2 x i64> %__b to <8 x i16>
10303 %2 = icmp sge <8 x i16> %0, %1
10304 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10305 %4 = bitcast <64 x i1> %3 to i64
10309 define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10310 ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10311 ; VLX: # %bb.0: # %entry
10312 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10313 ; VLX-NEXT: kmovq %k0, %rax
10316 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10317 ; NoVLX: # %bb.0: # %entry
10318 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10319 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10320 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10321 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10322 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10323 ; NoVLX-NEXT: kmovw %k0, %eax
10324 ; NoVLX-NEXT: vzeroupper
10327 %0 = bitcast <2 x i64> %__a to <8 x i16>
10328 %load = load <2 x i64>, <2 x i64>* %__b
10329 %1 = bitcast <2 x i64> %load to <8 x i16>
10330 %2 = icmp sge <8 x i16> %0, %1
10331 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10332 %4 = bitcast <64 x i1> %3 to i64
10336 define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10337 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10338 ; VLX: # %bb.0: # %entry
10339 ; VLX-NEXT: kmovd %edi, %k1
10340 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10341 ; VLX-NEXT: kmovq %k0, %rax
10344 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10345 ; NoVLX: # %bb.0: # %entry
10346 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10347 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10348 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10349 ; NoVLX-NEXT: kmovw %edi, %k1
10350 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10351 ; NoVLX-NEXT: kmovw %k0, %eax
10352 ; NoVLX-NEXT: vzeroupper
10355 %0 = bitcast <2 x i64> %__a to <8 x i16>
10356 %1 = bitcast <2 x i64> %__b to <8 x i16>
10357 %2 = icmp sge <8 x i16> %0, %1
10358 %3 = bitcast i8 %__u to <8 x i1>
10359 %4 = and <8 x i1> %2, %3
10360 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10361 %6 = bitcast <64 x i1> %5 to i64
10365 define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10366 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10367 ; VLX: # %bb.0: # %entry
10368 ; VLX-NEXT: kmovd %edi, %k1
10369 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10370 ; VLX-NEXT: kmovq %k0, %rax
10373 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10374 ; NoVLX: # %bb.0: # %entry
10375 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10376 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10377 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10378 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10379 ; NoVLX-NEXT: kmovw %edi, %k1
10380 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10381 ; NoVLX-NEXT: kmovw %k0, %eax
10382 ; NoVLX-NEXT: vzeroupper
10385 %0 = bitcast <2 x i64> %__a to <8 x i16>
10386 %load = load <2 x i64>, <2 x i64>* %__b
10387 %1 = bitcast <2 x i64> %load to <8 x i16>
10388 %2 = icmp sge <8 x i16> %0, %1
10389 %3 = bitcast i8 %__u to <8 x i1>
10390 %4 = and <8 x i1> %2, %3
10391 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10392 %6 = bitcast <64 x i1> %5 to i64
10397 define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10398 ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10399 ; VLX: # %bb.0: # %entry
10400 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0
10401 ; VLX-NEXT: kmovd %k0, %eax
10402 ; VLX-NEXT: vzeroupper
10405 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10406 ; NoVLX: # %bb.0: # %entry
10407 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10408 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10409 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10410 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10411 ; NoVLX-NEXT: kmovw %k0, %eax
10412 ; NoVLX-NEXT: vzeroupper
10415 %0 = bitcast <4 x i64> %__a to <16 x i16>
10416 %1 = bitcast <4 x i64> %__b to <16 x i16>
10417 %2 = icmp sge <16 x i16> %0, %1
10418 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10419 %4 = bitcast <32 x i1> %3 to i32
10423 define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10424 ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10425 ; VLX: # %bb.0: # %entry
10426 ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
10427 ; VLX-NEXT: kmovd %k0, %eax
10428 ; VLX-NEXT: vzeroupper
10431 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10432 ; NoVLX: # %bb.0: # %entry
10433 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
10434 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10435 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10436 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10437 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10438 ; NoVLX-NEXT: kmovw %k0, %eax
10439 ; NoVLX-NEXT: vzeroupper
10442 %0 = bitcast <4 x i64> %__a to <16 x i16>
10443 %load = load <4 x i64>, <4 x i64>* %__b
10444 %1 = bitcast <4 x i64> %load to <16 x i16>
10445 %2 = icmp sge <16 x i16> %0, %1
10446 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10447 %4 = bitcast <32 x i1> %3 to i32
10451 define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10452 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10453 ; VLX: # %bb.0: # %entry
10454 ; VLX-NEXT: kmovd %edi, %k1
10455 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10456 ; VLX-NEXT: kmovd %k0, %eax
10457 ; VLX-NEXT: vzeroupper
10460 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10461 ; NoVLX: # %bb.0: # %entry
10462 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10463 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10464 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10465 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10466 ; NoVLX-NEXT: kmovw %k0, %eax
10467 ; NoVLX-NEXT: andl %edi, %eax
10468 ; NoVLX-NEXT: vzeroupper
10471 %0 = bitcast <4 x i64> %__a to <16 x i16>
10472 %1 = bitcast <4 x i64> %__b to <16 x i16>
10473 %2 = icmp sge <16 x i16> %0, %1
10474 %3 = bitcast i16 %__u to <16 x i1>
10475 %4 = and <16 x i1> %2, %3
10476 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10477 %6 = bitcast <32 x i1> %5 to i32
10481 define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10482 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10483 ; VLX: # %bb.0: # %entry
10484 ; VLX-NEXT: kmovd %edi, %k1
10485 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10486 ; VLX-NEXT: kmovd %k0, %eax
10487 ; VLX-NEXT: vzeroupper
10490 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10491 ; NoVLX: # %bb.0: # %entry
10492 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10493 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10494 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10495 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10496 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10497 ; NoVLX-NEXT: kmovw %k0, %eax
10498 ; NoVLX-NEXT: andl %edi, %eax
10499 ; NoVLX-NEXT: vzeroupper
10502 %0 = bitcast <4 x i64> %__a to <16 x i16>
10503 %load = load <4 x i64>, <4 x i64>* %__b
10504 %1 = bitcast <4 x i64> %load to <16 x i16>
10505 %2 = icmp sge <16 x i16> %0, %1
10506 %3 = bitcast i16 %__u to <16 x i1>
10507 %4 = and <16 x i1> %2, %3
10508 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10509 %6 = bitcast <32 x i1> %5 to i32
10514 define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10515 ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10516 ; VLX: # %bb.0: # %entry
10517 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0
10518 ; VLX-NEXT: kmovq %k0, %rax
10519 ; VLX-NEXT: vzeroupper
10522 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10523 ; NoVLX: # %bb.0: # %entry
10524 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10525 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10526 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10527 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10528 ; NoVLX-NEXT: kmovw %k0, %eax
10529 ; NoVLX-NEXT: vzeroupper
10532 %0 = bitcast <4 x i64> %__a to <16 x i16>
10533 %1 = bitcast <4 x i64> %__b to <16 x i16>
10534 %2 = icmp sge <16 x i16> %0, %1
10535 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10536 %4 = bitcast <64 x i1> %3 to i64
10540 define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10541 ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10542 ; VLX: # %bb.0: # %entry
10543 ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
10544 ; VLX-NEXT: kmovq %k0, %rax
10545 ; VLX-NEXT: vzeroupper
10548 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10549 ; NoVLX: # %bb.0: # %entry
10550 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
10551 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10552 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10553 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10554 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10555 ; NoVLX-NEXT: kmovw %k0, %eax
10556 ; NoVLX-NEXT: vzeroupper
10559 %0 = bitcast <4 x i64> %__a to <16 x i16>
10560 %load = load <4 x i64>, <4 x i64>* %__b
10561 %1 = bitcast <4 x i64> %load to <16 x i16>
10562 %2 = icmp sge <16 x i16> %0, %1
10563 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10564 %4 = bitcast <64 x i1> %3 to i64
10568 define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10569 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10570 ; VLX: # %bb.0: # %entry
10571 ; VLX-NEXT: kmovd %edi, %k1
10572 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10573 ; VLX-NEXT: kmovq %k0, %rax
10574 ; VLX-NEXT: vzeroupper
10577 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10578 ; NoVLX: # %bb.0: # %entry
10579 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10580 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10581 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10582 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10583 ; NoVLX-NEXT: kmovw %k0, %eax
10584 ; NoVLX-NEXT: andl %edi, %eax
10585 ; NoVLX-NEXT: vzeroupper
10588 %0 = bitcast <4 x i64> %__a to <16 x i16>
10589 %1 = bitcast <4 x i64> %__b to <16 x i16>
10590 %2 = icmp sge <16 x i16> %0, %1
10591 %3 = bitcast i16 %__u to <16 x i1>
10592 %4 = and <16 x i1> %2, %3
10593 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10594 %6 = bitcast <64 x i1> %5 to i64
10598 define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10599 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10600 ; VLX: # %bb.0: # %entry
10601 ; VLX-NEXT: kmovd %edi, %k1
10602 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10603 ; VLX-NEXT: kmovq %k0, %rax
10604 ; VLX-NEXT: vzeroupper
10607 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10608 ; NoVLX: # %bb.0: # %entry
10609 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10610 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10611 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10612 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10613 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10614 ; NoVLX-NEXT: kmovw %k0, %eax
10615 ; NoVLX-NEXT: andl %edi, %eax
10616 ; NoVLX-NEXT: vzeroupper
10619 %0 = bitcast <4 x i64> %__a to <16 x i16>
10620 %load = load <4 x i64>, <4 x i64>* %__b
10621 %1 = bitcast <4 x i64> %load to <16 x i16>
10622 %2 = icmp sge <16 x i16> %0, %1
10623 %3 = bitcast i16 %__u to <16 x i1>
10624 %4 = and <16 x i1> %2, %3
10625 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10626 %6 = bitcast <64 x i1> %5 to i64
10631 define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10632 ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10633 ; VLX: # %bb.0: # %entry
10634 ; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
10635 ; VLX-NEXT: kmovq %k0, %rax
10636 ; VLX-NEXT: vzeroupper
10639 ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10640 ; NoVLX: # %bb.0: # %entry
10641 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
10642 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
10643 ; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
10644 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10645 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10646 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10647 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10648 ; NoVLX-NEXT: kmovw %k0, %ecx
10649 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
10650 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
10651 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10652 ; NoVLX-NEXT: kmovw %k0, %eax
10653 ; NoVLX-NEXT: shll $16, %eax
10654 ; NoVLX-NEXT: orl %ecx, %eax
10655 ; NoVLX-NEXT: vzeroupper
10658 %0 = bitcast <8 x i64> %__a to <32 x i16>
10659 %1 = bitcast <8 x i64> %__b to <32 x i16>
10660 %2 = icmp sge <32 x i16> %0, %1
10661 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10662 %4 = bitcast <64 x i1> %3 to i64
10666 define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
10667 ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10668 ; VLX: # %bb.0: # %entry
10669 ; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0
10670 ; VLX-NEXT: kmovq %k0, %rax
10671 ; VLX-NEXT: vzeroupper
10674 ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10675 ; NoVLX: # %bb.0: # %entry
10676 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
10677 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
10678 ; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm3
10679 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
10680 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
10681 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10682 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10683 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10684 ; NoVLX-NEXT: kmovw %k0, %ecx
10685 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
10686 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
10687 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10688 ; NoVLX-NEXT: kmovw %k0, %eax
10689 ; NoVLX-NEXT: shll $16, %eax
10690 ; NoVLX-NEXT: orl %ecx, %eax
10691 ; NoVLX-NEXT: vzeroupper
10694 %0 = bitcast <8 x i64> %__a to <32 x i16>
10695 %load = load <8 x i64>, <8 x i64>* %__b
10696 %1 = bitcast <8 x i64> %load to <32 x i16>
10697 %2 = icmp sge <32 x i16> %0, %1
10698 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10699 %4 = bitcast <64 x i1> %3 to i64
10703 define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10704 ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10705 ; VLX: # %bb.0: # %entry
10706 ; VLX-NEXT: kmovd %edi, %k1
10707 ; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
10708 ; VLX-NEXT: kmovq %k0, %rax
10709 ; VLX-NEXT: vzeroupper
10712 ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10713 ; NoVLX: # %bb.0: # %entry
10714 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
10715 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
10716 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
10717 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
10718 ; NoVLX-NEXT: kmovw %k0, %eax
10719 ; NoVLX-NEXT: andl %edi, %eax
10720 ; NoVLX-NEXT: shrl $16, %edi
10721 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
10722 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
10723 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10724 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10725 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10726 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10727 ; NoVLX-NEXT: kmovw %k0, %ecx
10728 ; NoVLX-NEXT: andl %edi, %ecx
10729 ; NoVLX-NEXT: shll $16, %ecx
10730 ; NoVLX-NEXT: movzwl %ax, %eax
10731 ; NoVLX-NEXT: orl %ecx, %eax
10732 ; NoVLX-NEXT: vzeroupper
10735 %0 = bitcast <8 x i64> %__a to <32 x i16>
10736 %1 = bitcast <8 x i64> %__b to <32 x i16>
10737 %2 = icmp sge <32 x i16> %0, %1
10738 %3 = bitcast i32 %__u to <32 x i1>
10739 %4 = and <32 x i1> %2, %3
10740 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10741 %6 = bitcast <64 x i1> %5 to i64
10745 define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
10746 ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10747 ; VLX: # %bb.0: # %entry
10748 ; VLX-NEXT: kmovd %edi, %k1
10749 ; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
10750 ; VLX-NEXT: kmovq %k0, %rax
10751 ; VLX-NEXT: vzeroupper
10754 ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10755 ; NoVLX: # %bb.0: # %entry
10756 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10757 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1
10758 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
10759 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
10760 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
10761 ; NoVLX-NEXT: kmovw %k0, %eax
10762 ; NoVLX-NEXT: andl %edi, %eax
10763 ; NoVLX-NEXT: shrl $16, %edi
10764 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
10765 ; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1
10766 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10767 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10768 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10769 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10770 ; NoVLX-NEXT: kmovw %k0, %ecx
10771 ; NoVLX-NEXT: andl %edi, %ecx
10772 ; NoVLX-NEXT: shll $16, %ecx
10773 ; NoVLX-NEXT: movzwl %ax, %eax
10774 ; NoVLX-NEXT: orl %ecx, %eax
10775 ; NoVLX-NEXT: vzeroupper
10778 %0 = bitcast <8 x i64> %__a to <32 x i16>
10779 %load = load <8 x i64>, <8 x i64>* %__b
10780 %1 = bitcast <8 x i64> %load to <32 x i16>
10781 %2 = icmp sge <32 x i16> %0, %1
10782 %3 = bitcast i32 %__u to <32 x i1>
10783 %4 = and <32 x i1> %2, %3
10784 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10785 %6 = bitcast <64 x i1> %5 to i64
10790 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10791 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10792 ; VLX: # %bb.0: # %entry
10793 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
10794 ; VLX-NEXT: kmovd %k0, %eax
10795 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10798 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10799 ; NoVLX: # %bb.0: # %entry
10800 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10801 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10802 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10803 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10804 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10805 ; NoVLX-NEXT: kmovw %k0, %eax
10806 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10807 ; NoVLX-NEXT: vzeroupper
10810 %0 = bitcast <2 x i64> %__a to <4 x i32>
10811 %1 = bitcast <2 x i64> %__b to <4 x i32>
10812 %2 = icmp sge <4 x i32> %0, %1
10813 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10814 %4 = bitcast <8 x i1> %3 to i8
10818 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10819 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10820 ; VLX: # %bb.0: # %entry
10821 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
10822 ; VLX-NEXT: kmovd %k0, %eax
10823 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10826 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10827 ; NoVLX: # %bb.0: # %entry
10828 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10829 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10830 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10831 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10832 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10833 ; NoVLX-NEXT: kmovw %k0, %eax
10834 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10835 ; NoVLX-NEXT: vzeroupper
10838 %0 = bitcast <2 x i64> %__a to <4 x i32>
10839 %load = load <2 x i64>, <2 x i64>* %__b
10840 %1 = bitcast <2 x i64> %load to <4 x i32>
10841 %2 = icmp sge <4 x i32> %0, %1
10842 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10843 %4 = bitcast <8 x i1> %3 to i8
10847 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10848 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10849 ; VLX: # %bb.0: # %entry
10850 ; VLX-NEXT: kmovd %edi, %k1
10851 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
10852 ; VLX-NEXT: kmovd %k0, %eax
10853 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10856 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10857 ; NoVLX: # %bb.0: # %entry
10858 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10859 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10860 ; NoVLX-NEXT: kmovw %edi, %k1
10861 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10862 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10863 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10864 ; NoVLX-NEXT: kmovw %k0, %eax
10865 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10866 ; NoVLX-NEXT: vzeroupper
10869 %0 = bitcast <2 x i64> %__a to <4 x i32>
10870 %1 = bitcast <2 x i64> %__b to <4 x i32>
10871 %2 = icmp sge <4 x i32> %0, %1
10872 %3 = bitcast i8 %__u to <8 x i1>
10873 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10874 %4 = and <4 x i1> %2, %extract.i
10875 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10876 %6 = bitcast <8 x i1> %5 to i8
10880 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10881 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
10882 ; VLX: # %bb.0: # %entry
10883 ; VLX-NEXT: kmovd %edi, %k1
10884 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
10885 ; VLX-NEXT: kmovd %k0, %eax
10886 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10889 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
10890 ; NoVLX: # %bb.0: # %entry
10891 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10892 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10893 ; NoVLX-NEXT: kmovw %edi, %k1
10894 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10895 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10896 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10897 ; NoVLX-NEXT: kmovw %k0, %eax
10898 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10899 ; NoVLX-NEXT: vzeroupper
10902 %0 = bitcast <2 x i64> %__a to <4 x i32>
10903 %load = load <2 x i64>, <2 x i64>* %__b
10904 %1 = bitcast <2 x i64> %load to <4 x i32>
10905 %2 = icmp sge <4 x i32> %0, %1
10906 %3 = bitcast i8 %__u to <8 x i1>
10907 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10908 %4 = and <4 x i1> %2, %extract.i
10909 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10910 %6 = bitcast <8 x i1> %5 to i8
10915 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
10916 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
10917 ; VLX: # %bb.0: # %entry
10918 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
10919 ; VLX-NEXT: kmovd %k0, %eax
10920 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10923 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
10924 ; NoVLX: # %bb.0: # %entry
10925 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10926 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
10927 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10928 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10929 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10930 ; NoVLX-NEXT: kmovw %k0, %eax
10931 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10932 ; NoVLX-NEXT: vzeroupper
10935 %0 = bitcast <2 x i64> %__a to <4 x i32>
10936 %load = load i32, i32* %__b
10937 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
10938 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
10939 %2 = icmp sge <4 x i32> %0, %1
10940 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10941 %4 = bitcast <8 x i1> %3 to i8
10945 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
10946 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
10947 ; VLX: # %bb.0: # %entry
10948 ; VLX-NEXT: kmovd %edi, %k1
10949 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
10950 ; VLX-NEXT: kmovd %k0, %eax
10951 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10954 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
10955 ; NoVLX: # %bb.0: # %entry
10956 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10957 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
10958 ; NoVLX-NEXT: kmovw %edi, %k1
10959 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10960 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10961 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10962 ; NoVLX-NEXT: kmovw %k0, %eax
10963 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10964 ; NoVLX-NEXT: vzeroupper
10967 %0 = bitcast <2 x i64> %__a to <4 x i32>
10968 %load = load i32, i32* %__b
10969 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
10970 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
10971 %2 = icmp sge <4 x i32> %0, %1
10972 %3 = bitcast i8 %__u to <8 x i1>
10973 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10974 %4 = and <4 x i1> %extract.i, %2
10975 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10976 %6 = bitcast <8 x i1> %5 to i8
10981 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10982 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
10983 ; VLX: # %bb.0: # %entry
10984 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
10985 ; VLX-NEXT: kmovd %k0, %eax
10986 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10989 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
10990 ; NoVLX: # %bb.0: # %entry
10991 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10992 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10993 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10994 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10995 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10996 ; NoVLX-NEXT: kmovw %k0, %eax
10997 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10998 ; NoVLX-NEXT: vzeroupper
11001 %0 = bitcast <2 x i64> %__a to <4 x i32>
11002 %1 = bitcast <2 x i64> %__b to <4 x i32>
11003 %2 = icmp sge <4 x i32> %0, %1
11004 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11005 %4 = bitcast <16 x i1> %3 to i16
11009 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11010 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
11011 ; VLX: # %bb.0: # %entry
11012 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
11013 ; VLX-NEXT: kmovd %k0, %eax
11014 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11017 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
11018 ; NoVLX: # %bb.0: # %entry
11019 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11020 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
11021 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11022 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11023 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11024 ; NoVLX-NEXT: kmovw %k0, %eax
11025 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11026 ; NoVLX-NEXT: vzeroupper
11029 %0 = bitcast <2 x i64> %__a to <4 x i32>
11030 %load = load <2 x i64>, <2 x i64>* %__b
11031 %1 = bitcast <2 x i64> %load to <4 x i32>
11032 %2 = icmp sge <4 x i32> %0, %1
11033 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11034 %4 = bitcast <16 x i1> %3 to i16
11038 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11039 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
11040 ; VLX: # %bb.0: # %entry
11041 ; VLX-NEXT: kmovd %edi, %k1
11042 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11043 ; VLX-NEXT: kmovd %k0, %eax
11044 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11047 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
11048 ; NoVLX: # %bb.0: # %entry
11049 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11050 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11051 ; NoVLX-NEXT: kmovw %edi, %k1
11052 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11053 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11054 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11055 ; NoVLX-NEXT: kmovw %k0, %eax
11056 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11057 ; NoVLX-NEXT: vzeroupper
11060 %0 = bitcast <2 x i64> %__a to <4 x i32>
11061 %1 = bitcast <2 x i64> %__b to <4 x i32>
11062 %2 = icmp sge <4 x i32> %0, %1
11063 %3 = bitcast i8 %__u to <8 x i1>
11064 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11065 %4 = and <4 x i1> %2, %extract.i
11066 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11067 %6 = bitcast <16 x i1> %5 to i16
11071 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11072 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11073 ; VLX: # %bb.0: # %entry
11074 ; VLX-NEXT: kmovd %edi, %k1
11075 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11076 ; VLX-NEXT: kmovd %k0, %eax
11077 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11080 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11081 ; NoVLX: # %bb.0: # %entry
11082 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11083 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11084 ; NoVLX-NEXT: kmovw %edi, %k1
11085 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11086 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11087 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11088 ; NoVLX-NEXT: kmovw %k0, %eax
11089 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11090 ; NoVLX-NEXT: vzeroupper
11093 %0 = bitcast <2 x i64> %__a to <4 x i32>
11094 %load = load <2 x i64>, <2 x i64>* %__b
11095 %1 = bitcast <2 x i64> %load to <4 x i32>
11096 %2 = icmp sge <4 x i32> %0, %1
11097 %3 = bitcast i8 %__u to <8 x i1>
11098 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11099 %4 = and <4 x i1> %2, %extract.i
11100 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11101 %6 = bitcast <16 x i1> %5 to i16
11106 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11107 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11108 ; VLX: # %bb.0: # %entry
11109 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11110 ; VLX-NEXT: kmovd %k0, %eax
11111 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11114 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11115 ; NoVLX: # %bb.0: # %entry
11116 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11117 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
11118 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11119 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11120 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11121 ; NoVLX-NEXT: kmovw %k0, %eax
11122 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11123 ; NoVLX-NEXT: vzeroupper
11126 %0 = bitcast <2 x i64> %__a to <4 x i32>
11127 %load = load i32, i32* %__b
11128 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11129 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11130 %2 = icmp sge <4 x i32> %0, %1
11131 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11132 %4 = bitcast <16 x i1> %3 to i16
11136 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11137 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11138 ; VLX: # %bb.0: # %entry
11139 ; VLX-NEXT: kmovd %edi, %k1
11140 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11141 ; VLX-NEXT: kmovd %k0, %eax
11142 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11145 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11146 ; NoVLX: # %bb.0: # %entry
11147 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11148 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
11149 ; NoVLX-NEXT: kmovw %edi, %k1
11150 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11151 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11152 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11153 ; NoVLX-NEXT: kmovw %k0, %eax
11154 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11155 ; NoVLX-NEXT: vzeroupper
11158 %0 = bitcast <2 x i64> %__a to <4 x i32>
11159 %load = load i32, i32* %__b
11160 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11161 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11162 %2 = icmp sge <4 x i32> %0, %1
11163 %3 = bitcast i8 %__u to <8 x i1>
11164 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11165 %4 = and <4 x i1> %extract.i, %2
11166 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11167 %6 = bitcast <16 x i1> %5 to i16
11172 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11173 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11174 ; VLX: # %bb.0: # %entry
11175 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
11176 ; VLX-NEXT: kmovd %k0, %eax
11179 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11180 ; NoVLX: # %bb.0: # %entry
11181 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11182 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11183 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11184 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11185 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11186 ; NoVLX-NEXT: kmovw %k0, %eax
11187 ; NoVLX-NEXT: vzeroupper
11190 %0 = bitcast <2 x i64> %__a to <4 x i32>
11191 %1 = bitcast <2 x i64> %__b to <4 x i32>
11192 %2 = icmp sge <4 x i32> %0, %1
11193 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11194 %4 = bitcast <32 x i1> %3 to i32
11198 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11199 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11200 ; VLX: # %bb.0: # %entry
11201 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
11202 ; VLX-NEXT: kmovd %k0, %eax
11205 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11206 ; NoVLX: # %bb.0: # %entry
11207 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11208 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
11209 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11210 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11211 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11212 ; NoVLX-NEXT: kmovw %k0, %eax
11213 ; NoVLX-NEXT: vzeroupper
11216 %0 = bitcast <2 x i64> %__a to <4 x i32>
11217 %load = load <2 x i64>, <2 x i64>* %__b
11218 %1 = bitcast <2 x i64> %load to <4 x i32>
11219 %2 = icmp sge <4 x i32> %0, %1
11220 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11221 %4 = bitcast <32 x i1> %3 to i32
11225 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11226 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11227 ; VLX: # %bb.0: # %entry
11228 ; VLX-NEXT: kmovd %edi, %k1
11229 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11230 ; VLX-NEXT: kmovd %k0, %eax
11233 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11234 ; NoVLX: # %bb.0: # %entry
11235 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11236 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11237 ; NoVLX-NEXT: kmovw %edi, %k1
11238 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11239 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11240 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11241 ; NoVLX-NEXT: kmovw %k0, %eax
11242 ; NoVLX-NEXT: vzeroupper
11245 %0 = bitcast <2 x i64> %__a to <4 x i32>
11246 %1 = bitcast <2 x i64> %__b to <4 x i32>
11247 %2 = icmp sge <4 x i32> %0, %1
11248 %3 = bitcast i8 %__u to <8 x i1>
11249 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11250 %4 = and <4 x i1> %2, %extract.i
11251 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11252 %6 = bitcast <32 x i1> %5 to i32
11256 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11257 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11258 ; VLX: # %bb.0: # %entry
11259 ; VLX-NEXT: kmovd %edi, %k1
11260 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11261 ; VLX-NEXT: kmovd %k0, %eax
11264 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11265 ; NoVLX: # %bb.0: # %entry
11266 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11267 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11268 ; NoVLX-NEXT: kmovw %edi, %k1
11269 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11270 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11271 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11272 ; NoVLX-NEXT: kmovw %k0, %eax
11273 ; NoVLX-NEXT: vzeroupper
11276 %0 = bitcast <2 x i64> %__a to <4 x i32>
11277 %load = load <2 x i64>, <2 x i64>* %__b
11278 %1 = bitcast <2 x i64> %load to <4 x i32>
11279 %2 = icmp sge <4 x i32> %0, %1
11280 %3 = bitcast i8 %__u to <8 x i1>
11281 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11282 %4 = and <4 x i1> %2, %extract.i
11283 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11284 %6 = bitcast <32 x i1> %5 to i32
11289 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11290 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11291 ; VLX: # %bb.0: # %entry
11292 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11293 ; VLX-NEXT: kmovd %k0, %eax
11296 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11297 ; NoVLX: # %bb.0: # %entry
11298 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11299 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
11300 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11301 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11302 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11303 ; NoVLX-NEXT: kmovw %k0, %eax
11304 ; NoVLX-NEXT: vzeroupper
11307 %0 = bitcast <2 x i64> %__a to <4 x i32>
11308 %load = load i32, i32* %__b
11309 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11310 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11311 %2 = icmp sge <4 x i32> %0, %1
11312 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11313 %4 = bitcast <32 x i1> %3 to i32
11317 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11318 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11319 ; VLX: # %bb.0: # %entry
11320 ; VLX-NEXT: kmovd %edi, %k1
11321 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11322 ; VLX-NEXT: kmovd %k0, %eax
11325 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11326 ; NoVLX: # %bb.0: # %entry
11327 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11328 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
11329 ; NoVLX-NEXT: kmovw %edi, %k1
11330 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11331 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11332 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11333 ; NoVLX-NEXT: kmovw %k0, %eax
11334 ; NoVLX-NEXT: vzeroupper
11337 %0 = bitcast <2 x i64> %__a to <4 x i32>
11338 %load = load i32, i32* %__b
11339 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11340 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11341 %2 = icmp sge <4 x i32> %0, %1
11342 %3 = bitcast i8 %__u to <8 x i1>
11343 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11344 %4 = and <4 x i1> %extract.i, %2
11345 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11346 %6 = bitcast <32 x i1> %5 to i32
11351 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11352 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11353 ; VLX: # %bb.0: # %entry
11354 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
11355 ; VLX-NEXT: kmovq %k0, %rax
11358 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11359 ; NoVLX: # %bb.0: # %entry
11360 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11361 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11362 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11363 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11364 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11365 ; NoVLX-NEXT: kmovw %k0, %eax
11366 ; NoVLX-NEXT: vzeroupper
11369 %0 = bitcast <2 x i64> %__a to <4 x i32>
11370 %1 = bitcast <2 x i64> %__b to <4 x i32>
11371 %2 = icmp sge <4 x i32> %0, %1
11372 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11373 %4 = bitcast <64 x i1> %3 to i64
11377 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11378 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11379 ; VLX: # %bb.0: # %entry
11380 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
11381 ; VLX-NEXT: kmovq %k0, %rax
11384 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11385 ; NoVLX: # %bb.0: # %entry
11386 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11387 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
11388 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11389 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11390 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11391 ; NoVLX-NEXT: kmovw %k0, %eax
11392 ; NoVLX-NEXT: vzeroupper
11395 %0 = bitcast <2 x i64> %__a to <4 x i32>
11396 %load = load <2 x i64>, <2 x i64>* %__b
11397 %1 = bitcast <2 x i64> %load to <4 x i32>
11398 %2 = icmp sge <4 x i32> %0, %1
11399 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11400 %4 = bitcast <64 x i1> %3 to i64
11404 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11405 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11406 ; VLX: # %bb.0: # %entry
11407 ; VLX-NEXT: kmovd %edi, %k1
11408 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11409 ; VLX-NEXT: kmovq %k0, %rax
11412 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11413 ; NoVLX: # %bb.0: # %entry
11414 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11415 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11416 ; NoVLX-NEXT: kmovw %edi, %k1
11417 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11418 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11419 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11420 ; NoVLX-NEXT: kmovw %k0, %eax
11421 ; NoVLX-NEXT: vzeroupper
11424 %0 = bitcast <2 x i64> %__a to <4 x i32>
11425 %1 = bitcast <2 x i64> %__b to <4 x i32>
11426 %2 = icmp sge <4 x i32> %0, %1
11427 %3 = bitcast i8 %__u to <8 x i1>
11428 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11429 %4 = and <4 x i1> %2, %extract.i
11430 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11431 %6 = bitcast <64 x i1> %5 to i64
11435 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11436 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11437 ; VLX: # %bb.0: # %entry
11438 ; VLX-NEXT: kmovd %edi, %k1
11439 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11440 ; VLX-NEXT: kmovq %k0, %rax
11443 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11444 ; NoVLX: # %bb.0: # %entry
11445 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11446 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11447 ; NoVLX-NEXT: kmovw %edi, %k1
11448 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11449 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11450 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11451 ; NoVLX-NEXT: kmovw %k0, %eax
11452 ; NoVLX-NEXT: vzeroupper
11455 %0 = bitcast <2 x i64> %__a to <4 x i32>
11456 %load = load <2 x i64>, <2 x i64>* %__b
11457 %1 = bitcast <2 x i64> %load to <4 x i32>
11458 %2 = icmp sge <4 x i32> %0, %1
11459 %3 = bitcast i8 %__u to <8 x i1>
11460 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11461 %4 = and <4 x i1> %2, %extract.i
11462 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11463 %6 = bitcast <64 x i1> %5 to i64
11468 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11469 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11470 ; VLX: # %bb.0: # %entry
11471 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11472 ; VLX-NEXT: kmovq %k0, %rax
11475 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11476 ; NoVLX: # %bb.0: # %entry
11477 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11478 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
11479 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11480 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11481 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11482 ; NoVLX-NEXT: kmovw %k0, %eax
11483 ; NoVLX-NEXT: vzeroupper
11486 %0 = bitcast <2 x i64> %__a to <4 x i32>
11487 %load = load i32, i32* %__b
11488 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11489 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11490 %2 = icmp sge <4 x i32> %0, %1
11491 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11492 %4 = bitcast <64 x i1> %3 to i64
11496 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11497 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11498 ; VLX: # %bb.0: # %entry
11499 ; VLX-NEXT: kmovd %edi, %k1
11500 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11501 ; VLX-NEXT: kmovq %k0, %rax
11504 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11505 ; NoVLX: # %bb.0: # %entry
11506 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11507 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
11508 ; NoVLX-NEXT: kmovw %edi, %k1
11509 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11510 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11511 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11512 ; NoVLX-NEXT: kmovw %k0, %eax
11513 ; NoVLX-NEXT: vzeroupper
11516 %0 = bitcast <2 x i64> %__a to <4 x i32>
11517 %load = load i32, i32* %__b
11518 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11519 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11520 %2 = icmp sge <4 x i32> %0, %1
11521 %3 = bitcast i8 %__u to <8 x i1>
11522 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11523 %4 = and <4 x i1> %extract.i, %2
11524 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11525 %6 = bitcast <64 x i1> %5 to i64
11530 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11531 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11532 ; VLX: # %bb.0: # %entry
11533 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11534 ; VLX-NEXT: kmovd %k0, %eax
11535 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11536 ; VLX-NEXT: vzeroupper
11539 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11540 ; NoVLX: # %bb.0: # %entry
11541 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11542 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11543 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11544 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11545 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11546 ; NoVLX-NEXT: kmovw %k0, %eax
11547 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11548 ; NoVLX-NEXT: vzeroupper
11551 %0 = bitcast <4 x i64> %__a to <8 x i32>
11552 %1 = bitcast <4 x i64> %__b to <8 x i32>
11553 %2 = icmp sge <8 x i32> %0, %1
11554 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11555 %4 = bitcast <16 x i1> %3 to i16
11559 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11560 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11561 ; VLX: # %bb.0: # %entry
11562 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11563 ; VLX-NEXT: kmovd %k0, %eax
11564 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11565 ; VLX-NEXT: vzeroupper
11568 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11569 ; NoVLX: # %bb.0: # %entry
11570 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11571 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11572 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11573 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11574 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11575 ; NoVLX-NEXT: kmovw %k0, %eax
11576 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11577 ; NoVLX-NEXT: vzeroupper
11580 %0 = bitcast <4 x i64> %__a to <8 x i32>
11581 %load = load <4 x i64>, <4 x i64>* %__b
11582 %1 = bitcast <4 x i64> %load to <8 x i32>
11583 %2 = icmp sge <8 x i32> %0, %1
11584 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11585 %4 = bitcast <16 x i1> %3 to i16
11589 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11590 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11591 ; VLX: # %bb.0: # %entry
11592 ; VLX-NEXT: kmovd %edi, %k1
11593 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11594 ; VLX-NEXT: kmovd %k0, %eax
11595 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11596 ; VLX-NEXT: vzeroupper
11599 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11600 ; NoVLX: # %bb.0: # %entry
11601 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11602 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11603 ; NoVLX-NEXT: kmovw %edi, %k1
11604 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11605 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11606 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11607 ; NoVLX-NEXT: kmovw %k0, %eax
11608 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11609 ; NoVLX-NEXT: vzeroupper
11612 %0 = bitcast <4 x i64> %__a to <8 x i32>
11613 %1 = bitcast <4 x i64> %__b to <8 x i32>
11614 %2 = icmp sge <8 x i32> %0, %1
11615 %3 = bitcast i8 %__u to <8 x i1>
11616 %4 = and <8 x i1> %2, %3
11617 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11618 %6 = bitcast <16 x i1> %5 to i16
11622 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11623 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11624 ; VLX: # %bb.0: # %entry
11625 ; VLX-NEXT: kmovd %edi, %k1
11626 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11627 ; VLX-NEXT: kmovd %k0, %eax
11628 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11629 ; VLX-NEXT: vzeroupper
11632 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11633 ; NoVLX: # %bb.0: # %entry
11634 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11635 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
11636 ; NoVLX-NEXT: kmovw %edi, %k1
11637 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11638 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11639 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11640 ; NoVLX-NEXT: kmovw %k0, %eax
11641 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11642 ; NoVLX-NEXT: vzeroupper
11645 %0 = bitcast <4 x i64> %__a to <8 x i32>
11646 %load = load <4 x i64>, <4 x i64>* %__b
11647 %1 = bitcast <4 x i64> %load to <8 x i32>
11648 %2 = icmp sge <8 x i32> %0, %1
11649 %3 = bitcast i8 %__u to <8 x i1>
11650 %4 = and <8 x i1> %2, %3
11651 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11652 %6 = bitcast <16 x i1> %5 to i16
11657 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
11658 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11659 ; VLX: # %bb.0: # %entry
11660 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11661 ; VLX-NEXT: kmovd %k0, %eax
11662 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11663 ; VLX-NEXT: vzeroupper
11666 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11667 ; NoVLX: # %bb.0: # %entry
11668 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11669 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
11670 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11671 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11672 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11673 ; NoVLX-NEXT: kmovw %k0, %eax
11674 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11675 ; NoVLX-NEXT: vzeroupper
11678 %0 = bitcast <4 x i64> %__a to <8 x i32>
11679 %load = load i32, i32* %__b
11680 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11681 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11682 %2 = icmp sge <8 x i32> %0, %1
11683 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11684 %4 = bitcast <16 x i1> %3 to i16
11688 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
11689 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11690 ; VLX: # %bb.0: # %entry
11691 ; VLX-NEXT: kmovd %edi, %k1
11692 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11693 ; VLX-NEXT: kmovd %k0, %eax
11694 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11695 ; VLX-NEXT: vzeroupper
11698 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11699 ; NoVLX: # %bb.0: # %entry
11700 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11701 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
11702 ; NoVLX-NEXT: kmovw %edi, %k1
11703 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11704 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11705 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11706 ; NoVLX-NEXT: kmovw %k0, %eax
11707 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11708 ; NoVLX-NEXT: vzeroupper
11711 %0 = bitcast <4 x i64> %__a to <8 x i32>
11712 %load = load i32, i32* %__b
11713 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11714 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11715 %2 = icmp sge <8 x i32> %0, %1
11716 %3 = bitcast i8 %__u to <8 x i1>
11717 %4 = and <8 x i1> %3, %2
11718 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11719 %6 = bitcast <16 x i1> %5 to i16
11724 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11725 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11726 ; VLX: # %bb.0: # %entry
11727 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11728 ; VLX-NEXT: kmovd %k0, %eax
11729 ; VLX-NEXT: vzeroupper
11732 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11733 ; NoVLX: # %bb.0: # %entry
11734 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11735 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11736 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11737 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11738 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11739 ; NoVLX-NEXT: kmovw %k0, %eax
11740 ; NoVLX-NEXT: vzeroupper
11743 %0 = bitcast <4 x i64> %__a to <8 x i32>
11744 %1 = bitcast <4 x i64> %__b to <8 x i32>
11745 %2 = icmp sge <8 x i32> %0, %1
11746 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11747 %4 = bitcast <32 x i1> %3 to i32
11751 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11752 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11753 ; VLX: # %bb.0: # %entry
11754 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11755 ; VLX-NEXT: kmovd %k0, %eax
11756 ; VLX-NEXT: vzeroupper
11759 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11760 ; NoVLX: # %bb.0: # %entry
11761 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11762 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11763 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11764 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11765 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11766 ; NoVLX-NEXT: kmovw %k0, %eax
11767 ; NoVLX-NEXT: vzeroupper
11770 %0 = bitcast <4 x i64> %__a to <8 x i32>
11771 %load = load <4 x i64>, <4 x i64>* %__b
11772 %1 = bitcast <4 x i64> %load to <8 x i32>
11773 %2 = icmp sge <8 x i32> %0, %1
11774 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11775 %4 = bitcast <32 x i1> %3 to i32
11779 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11780 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11781 ; VLX: # %bb.0: # %entry
11782 ; VLX-NEXT: kmovd %edi, %k1
11783 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11784 ; VLX-NEXT: kmovd %k0, %eax
11785 ; VLX-NEXT: vzeroupper
11788 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11789 ; NoVLX: # %bb.0: # %entry
11790 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11791 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11792 ; NoVLX-NEXT: kmovw %edi, %k1
11793 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11794 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11795 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11796 ; NoVLX-NEXT: kmovw %k0, %eax
11797 ; NoVLX-NEXT: vzeroupper
11800 %0 = bitcast <4 x i64> %__a to <8 x i32>
11801 %1 = bitcast <4 x i64> %__b to <8 x i32>
11802 %2 = icmp sge <8 x i32> %0, %1
11803 %3 = bitcast i8 %__u to <8 x i1>
11804 %4 = and <8 x i1> %2, %3
11805 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11806 %6 = bitcast <32 x i1> %5 to i32
11810 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11811 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11812 ; VLX: # %bb.0: # %entry
11813 ; VLX-NEXT: kmovd %edi, %k1
11814 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11815 ; VLX-NEXT: kmovd %k0, %eax
11816 ; VLX-NEXT: vzeroupper
11819 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11820 ; NoVLX: # %bb.0: # %entry
11821 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11822 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
11823 ; NoVLX-NEXT: kmovw %edi, %k1
11824 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11825 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11826 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11827 ; NoVLX-NEXT: kmovw %k0, %eax
11828 ; NoVLX-NEXT: vzeroupper
11831 %0 = bitcast <4 x i64> %__a to <8 x i32>
11832 %load = load <4 x i64>, <4 x i64>* %__b
11833 %1 = bitcast <4 x i64> %load to <8 x i32>
11834 %2 = icmp sge <8 x i32> %0, %1
11835 %3 = bitcast i8 %__u to <8 x i1>
11836 %4 = and <8 x i1> %2, %3
11837 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11838 %6 = bitcast <32 x i1> %5 to i32
11843 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
11844 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11845 ; VLX: # %bb.0: # %entry
11846 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11847 ; VLX-NEXT: kmovd %k0, %eax
11848 ; VLX-NEXT: vzeroupper
11851 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11852 ; NoVLX: # %bb.0: # %entry
11853 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11854 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
11855 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11856 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11857 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11858 ; NoVLX-NEXT: kmovw %k0, %eax
11859 ; NoVLX-NEXT: vzeroupper
11862 %0 = bitcast <4 x i64> %__a to <8 x i32>
11863 %load = load i32, i32* %__b
11864 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11865 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11866 %2 = icmp sge <8 x i32> %0, %1
11867 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11868 %4 = bitcast <32 x i1> %3 to i32
11872 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
11873 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
11874 ; VLX: # %bb.0: # %entry
11875 ; VLX-NEXT: kmovd %edi, %k1
11876 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11877 ; VLX-NEXT: kmovd %k0, %eax
11878 ; VLX-NEXT: vzeroupper
11881 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
11882 ; NoVLX: # %bb.0: # %entry
11883 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11884 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
11885 ; NoVLX-NEXT: kmovw %edi, %k1
11886 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11887 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11888 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11889 ; NoVLX-NEXT: kmovw %k0, %eax
11890 ; NoVLX-NEXT: vzeroupper
11893 %0 = bitcast <4 x i64> %__a to <8 x i32>
11894 %load = load i32, i32* %__b
11895 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11896 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11897 %2 = icmp sge <8 x i32> %0, %1
11898 %3 = bitcast i8 %__u to <8 x i1>
11899 %4 = and <8 x i1> %3, %2
11900 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11901 %6 = bitcast <32 x i1> %5 to i32
11906 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11907 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
11908 ; VLX: # %bb.0: # %entry
11909 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11910 ; VLX-NEXT: kmovq %k0, %rax
11911 ; VLX-NEXT: vzeroupper
11914 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
11915 ; NoVLX: # %bb.0: # %entry
11916 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11917 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11918 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11919 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11920 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11921 ; NoVLX-NEXT: kmovw %k0, %eax
11922 ; NoVLX-NEXT: vzeroupper
11925 %0 = bitcast <4 x i64> %__a to <8 x i32>
11926 %1 = bitcast <4 x i64> %__b to <8 x i32>
11927 %2 = icmp sge <8 x i32> %0, %1
11928 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11929 %4 = bitcast <64 x i1> %3 to i64
11933 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11934 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
11935 ; VLX: # %bb.0: # %entry
11936 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11937 ; VLX-NEXT: kmovq %k0, %rax
11938 ; VLX-NEXT: vzeroupper
11941 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
11942 ; NoVLX: # %bb.0: # %entry
11943 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11944 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11945 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11946 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11947 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11948 ; NoVLX-NEXT: kmovw %k0, %eax
11949 ; NoVLX-NEXT: vzeroupper
11952 %0 = bitcast <4 x i64> %__a to <8 x i32>
11953 %load = load <4 x i64>, <4 x i64>* %__b
11954 %1 = bitcast <4 x i64> %load to <8 x i32>
11955 %2 = icmp sge <8 x i32> %0, %1
11956 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11957 %4 = bitcast <64 x i1> %3 to i64
11961 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11962 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
11963 ; VLX: # %bb.0: # %entry
11964 ; VLX-NEXT: kmovd %edi, %k1
11965 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11966 ; VLX-NEXT: kmovq %k0, %rax
11967 ; VLX-NEXT: vzeroupper
11970 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
11971 ; NoVLX: # %bb.0: # %entry
11972 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11973 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11974 ; NoVLX-NEXT: kmovw %edi, %k1
11975 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11976 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11977 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11978 ; NoVLX-NEXT: kmovw %k0, %eax
11979 ; NoVLX-NEXT: vzeroupper
11982 %0 = bitcast <4 x i64> %__a to <8 x i32>
11983 %1 = bitcast <4 x i64> %__b to <8 x i32>
11984 %2 = icmp sge <8 x i32> %0, %1
11985 %3 = bitcast i8 %__u to <8 x i1>
11986 %4 = and <8 x i1> %2, %3
11987 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11988 %6 = bitcast <64 x i1> %5 to i64
11992 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11993 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
11994 ; VLX: # %bb.0: # %entry
11995 ; VLX-NEXT: kmovd %edi, %k1
11996 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11997 ; VLX-NEXT: kmovq %k0, %rax
11998 ; VLX-NEXT: vzeroupper
12001 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
12002 ; NoVLX: # %bb.0: # %entry
12003 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
12004 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
12005 ; NoVLX-NEXT: kmovw %edi, %k1
12006 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12007 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
12008 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
12009 ; NoVLX-NEXT: kmovw %k0, %eax
12010 ; NoVLX-NEXT: vzeroupper
12013 %0 = bitcast <4 x i64> %__a to <8 x i32>
12014 %load = load <4 x i64>, <4 x i64>* %__b
12015 %1 = bitcast <4 x i64> %load to <8 x i32>
12016 %2 = icmp sge <8 x i32> %0, %1
12017 %3 = bitcast i8 %__u to <8 x i1>
12018 %4 = and <8 x i1> %2, %3
12019 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12020 %6 = bitcast <64 x i1> %5 to i64
12025 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
12026 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
12027 ; VLX: # %bb.0: # %entry
12028 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
12029 ; VLX-NEXT: kmovq %k0, %rax
12030 ; VLX-NEXT: vzeroupper
12033 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
12034 ; NoVLX: # %bb.0: # %entry
12035 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
12036 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
12037 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12038 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
12039 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
12040 ; NoVLX-NEXT: kmovw %k0, %eax
12041 ; NoVLX-NEXT: vzeroupper
12044 %0 = bitcast <4 x i64> %__a to <8 x i32>
12045 %load = load i32, i32* %__b
12046 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12047 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12048 %2 = icmp sge <8 x i32> %0, %1
12049 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12050 %4 = bitcast <64 x i1> %3 to i64
12054 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
12055 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
12056 ; VLX: # %bb.0: # %entry
12057 ; VLX-NEXT: kmovd %edi, %k1
12058 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
12059 ; VLX-NEXT: kmovq %k0, %rax
12060 ; VLX-NEXT: vzeroupper
12063 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
12064 ; NoVLX: # %bb.0: # %entry
12065 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
12066 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
12067 ; NoVLX-NEXT: kmovw %edi, %k1
12068 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12069 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
12070 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
12071 ; NoVLX-NEXT: kmovw %k0, %eax
12072 ; NoVLX-NEXT: vzeroupper
12075 %0 = bitcast <4 x i64> %__a to <8 x i32>
12076 %load = load i32, i32* %__b
12077 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12078 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12079 %2 = icmp sge <8 x i32> %0, %1
12080 %3 = bitcast i8 %__u to <8 x i1>
12081 %4 = and <8 x i1> %3, %2
12082 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12083 %6 = bitcast <64 x i1> %5 to i64
12088 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12089 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12090 ; VLX: # %bb.0: # %entry
12091 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12092 ; VLX-NEXT: kmovd %k0, %eax
12093 ; VLX-NEXT: vzeroupper
12096 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12097 ; NoVLX: # %bb.0: # %entry
12098 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12099 ; NoVLX-NEXT: kmovw %k0, %eax
12100 ; NoVLX-NEXT: vzeroupper
12103 %0 = bitcast <8 x i64> %__a to <16 x i32>
12104 %1 = bitcast <8 x i64> %__b to <16 x i32>
12105 %2 = icmp sge <16 x i32> %0, %1
12106 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12107 %4 = bitcast <32 x i1> %3 to i32
12111 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12112 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12113 ; VLX: # %bb.0: # %entry
12114 ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12115 ; VLX-NEXT: kmovd %k0, %eax
12116 ; VLX-NEXT: vzeroupper
12119 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12120 ; NoVLX: # %bb.0: # %entry
12121 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12122 ; NoVLX-NEXT: kmovw %k0, %eax
12123 ; NoVLX-NEXT: vzeroupper
12126 %0 = bitcast <8 x i64> %__a to <16 x i32>
12127 %load = load <8 x i64>, <8 x i64>* %__b
12128 %1 = bitcast <8 x i64> %load to <16 x i32>
12129 %2 = icmp sge <16 x i32> %0, %1
12130 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12131 %4 = bitcast <32 x i1> %3 to i32
12135 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12136 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12137 ; VLX: # %bb.0: # %entry
12138 ; VLX-NEXT: kmovd %edi, %k1
12139 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12140 ; VLX-NEXT: kmovd %k0, %eax
12141 ; VLX-NEXT: vzeroupper
12144 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12145 ; NoVLX: # %bb.0: # %entry
12146 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12147 ; NoVLX-NEXT: kmovw %k0, %eax
12148 ; NoVLX-NEXT: andl %edi, %eax
12149 ; NoVLX-NEXT: vzeroupper
12152 %0 = bitcast <8 x i64> %__a to <16 x i32>
12153 %1 = bitcast <8 x i64> %__b to <16 x i32>
12154 %2 = icmp sge <16 x i32> %0, %1
12155 %3 = bitcast i16 %__u to <16 x i1>
12156 %4 = and <16 x i1> %2, %3
12157 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12158 %6 = bitcast <32 x i1> %5 to i32
12162 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12163 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12164 ; VLX: # %bb.0: # %entry
12165 ; VLX-NEXT: kmovd %edi, %k1
12166 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12167 ; VLX-NEXT: kmovd %k0, %eax
12168 ; VLX-NEXT: vzeroupper
12171 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12172 ; NoVLX: # %bb.0: # %entry
12173 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0
12174 ; NoVLX-NEXT: kmovw %k0, %eax
12175 ; NoVLX-NEXT: andl %edi, %eax
12176 ; NoVLX-NEXT: vzeroupper
12179 %0 = bitcast <8 x i64> %__a to <16 x i32>
12180 %load = load <8 x i64>, <8 x i64>* %__b
12181 %1 = bitcast <8 x i64> %load to <16 x i32>
12182 %2 = icmp sge <16 x i32> %0, %1
12183 %3 = bitcast i16 %__u to <16 x i1>
12184 %4 = and <16 x i1> %2, %3
12185 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12186 %6 = bitcast <32 x i1> %5 to i32
12191 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
12192 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12193 ; VLX: # %bb.0: # %entry
12194 ; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12195 ; VLX-NEXT: kmovd %k0, %eax
12196 ; VLX-NEXT: vzeroupper
12199 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12200 ; NoVLX: # %bb.0: # %entry
12201 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12202 ; NoVLX-NEXT: kmovw %k0, %eax
12203 ; NoVLX-NEXT: vzeroupper
12206 %0 = bitcast <8 x i64> %__a to <16 x i32>
12207 %load = load i32, i32* %__b
12208 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12209 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12210 %2 = icmp sge <16 x i32> %0, %1
12211 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12212 %4 = bitcast <32 x i1> %3 to i32
12216 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
12217 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12218 ; VLX: # %bb.0: # %entry
12219 ; VLX-NEXT: kmovd %edi, %k1
12220 ; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12221 ; VLX-NEXT: kmovd %k0, %eax
12222 ; VLX-NEXT: vzeroupper
12225 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12226 ; NoVLX: # %bb.0: # %entry
12227 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12228 ; NoVLX-NEXT: kmovw %k0, %eax
12229 ; NoVLX-NEXT: andl %edi, %eax
12230 ; NoVLX-NEXT: vzeroupper
12233 %0 = bitcast <8 x i64> %__a to <16 x i32>
12234 %load = load i32, i32* %__b
12235 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12236 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12237 %2 = icmp sge <16 x i32> %0, %1
12238 %3 = bitcast i16 %__u to <16 x i1>
12239 %4 = and <16 x i1> %3, %2
12240 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12241 %6 = bitcast <32 x i1> %5 to i32
12246 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12247 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12248 ; VLX: # %bb.0: # %entry
12249 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12250 ; VLX-NEXT: kmovq %k0, %rax
12251 ; VLX-NEXT: vzeroupper
12254 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12255 ; NoVLX: # %bb.0: # %entry
12256 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12257 ; NoVLX-NEXT: kmovw %k0, %eax
12258 ; NoVLX-NEXT: vzeroupper
12261 %0 = bitcast <8 x i64> %__a to <16 x i32>
12262 %1 = bitcast <8 x i64> %__b to <16 x i32>
12263 %2 = icmp sge <16 x i32> %0, %1
12264 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12265 %4 = bitcast <64 x i1> %3 to i64
12269 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12270 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12271 ; VLX: # %bb.0: # %entry
12272 ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12273 ; VLX-NEXT: kmovq %k0, %rax
12274 ; VLX-NEXT: vzeroupper
12277 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12278 ; NoVLX: # %bb.0: # %entry
12279 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12280 ; NoVLX-NEXT: kmovw %k0, %eax
12281 ; NoVLX-NEXT: vzeroupper
12284 %0 = bitcast <8 x i64> %__a to <16 x i32>
12285 %load = load <8 x i64>, <8 x i64>* %__b
12286 %1 = bitcast <8 x i64> %load to <16 x i32>
12287 %2 = icmp sge <16 x i32> %0, %1
12288 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12289 %4 = bitcast <64 x i1> %3 to i64
12293 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12294 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12295 ; VLX: # %bb.0: # %entry
12296 ; VLX-NEXT: kmovd %edi, %k1
12297 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12298 ; VLX-NEXT: kmovq %k0, %rax
12299 ; VLX-NEXT: vzeroupper
12302 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12303 ; NoVLX: # %bb.0: # %entry
12304 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12305 ; NoVLX-NEXT: kmovw %k0, %eax
12306 ; NoVLX-NEXT: andl %edi, %eax
12307 ; NoVLX-NEXT: vzeroupper
12310 %0 = bitcast <8 x i64> %__a to <16 x i32>
12311 %1 = bitcast <8 x i64> %__b to <16 x i32>
12312 %2 = icmp sge <16 x i32> %0, %1
12313 %3 = bitcast i16 %__u to <16 x i1>
12314 %4 = and <16 x i1> %2, %3
12315 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12316 %6 = bitcast <64 x i1> %5 to i64
12320 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12321 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12322 ; VLX: # %bb.0: # %entry
12323 ; VLX-NEXT: kmovd %edi, %k1
12324 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12325 ; VLX-NEXT: kmovq %k0, %rax
12326 ; VLX-NEXT: vzeroupper
12329 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12330 ; NoVLX: # %bb.0: # %entry
12331 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0
12332 ; NoVLX-NEXT: kmovw %k0, %eax
12333 ; NoVLX-NEXT: andl %edi, %eax
12334 ; NoVLX-NEXT: vzeroupper
12337 %0 = bitcast <8 x i64> %__a to <16 x i32>
12338 %load = load <8 x i64>, <8 x i64>* %__b
12339 %1 = bitcast <8 x i64> %load to <16 x i32>
12340 %2 = icmp sge <16 x i32> %0, %1
12341 %3 = bitcast i16 %__u to <16 x i1>
12342 %4 = and <16 x i1> %2, %3
12343 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12344 %6 = bitcast <64 x i1> %5 to i64
12349 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
12350 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12351 ; VLX: # %bb.0: # %entry
12352 ; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12353 ; VLX-NEXT: kmovq %k0, %rax
12354 ; VLX-NEXT: vzeroupper
12357 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12358 ; NoVLX: # %bb.0: # %entry
12359 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12360 ; NoVLX-NEXT: kmovw %k0, %eax
12361 ; NoVLX-NEXT: vzeroupper
12364 %0 = bitcast <8 x i64> %__a to <16 x i32>
12365 %load = load i32, i32* %__b
12366 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12367 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12368 %2 = icmp sge <16 x i32> %0, %1
12369 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12370 %4 = bitcast <64 x i1> %3 to i64
12374 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
12375 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12376 ; VLX: # %bb.0: # %entry
12377 ; VLX-NEXT: kmovd %edi, %k1
12378 ; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12379 ; VLX-NEXT: kmovq %k0, %rax
12380 ; VLX-NEXT: vzeroupper
12383 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12384 ; NoVLX: # %bb.0: # %entry
12385 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12386 ; NoVLX-NEXT: kmovw %k0, %eax
12387 ; NoVLX-NEXT: andl %edi, %eax
12388 ; NoVLX-NEXT: vzeroupper
12391 %0 = bitcast <8 x i64> %__a to <16 x i32>
12392 %load = load i32, i32* %__b
12393 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12394 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12395 %2 = icmp sge <16 x i32> %0, %1
12396 %3 = bitcast i16 %__u to <16 x i1>
12397 %4 = and <16 x i1> %3, %2
12398 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12399 %6 = bitcast <64 x i1> %5 to i64
12404 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12405 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12406 ; VLX: # %bb.0: # %entry
12407 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12408 ; VLX-NEXT: kmovb %k0, %eax
12411 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12412 ; NoVLX: # %bb.0: # %entry
12413 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12414 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12415 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12416 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12417 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12418 ; NoVLX-NEXT: kmovw %k0, %eax
12419 ; NoVLX-NEXT: vzeroupper
12422 %0 = bitcast <2 x i64> %__a to <2 x i64>
12423 %1 = bitcast <2 x i64> %__b to <2 x i64>
12424 %2 = icmp sge <2 x i64> %0, %1
12425 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12426 %4 = bitcast <4 x i1> %3 to i4
12430 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12431 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12432 ; VLX: # %bb.0: # %entry
12433 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12434 ; VLX-NEXT: kmovb %k0, %eax
12437 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12438 ; NoVLX: # %bb.0: # %entry
12439 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12440 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12441 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12442 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12443 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12444 ; NoVLX-NEXT: kmovw %k0, %eax
12445 ; NoVLX-NEXT: vzeroupper
12448 %0 = bitcast <2 x i64> %__a to <2 x i64>
12449 %load = load <2 x i64>, <2 x i64>* %__b
12450 %1 = bitcast <2 x i64> %load to <2 x i64>
12451 %2 = icmp sge <2 x i64> %0, %1
12452 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12453 %4 = bitcast <4 x i1> %3 to i4
12457 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12458 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12459 ; VLX: # %bb.0: # %entry
12460 ; VLX-NEXT: kmovd %edi, %k1
12461 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12462 ; VLX-NEXT: kmovb %k0, %eax
12465 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12466 ; NoVLX: # %bb.0: # %entry
12467 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12468 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12469 ; NoVLX-NEXT: kmovw %edi, %k1
12470 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12471 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12472 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12473 ; NoVLX-NEXT: kmovw %k0, %eax
12474 ; NoVLX-NEXT: vzeroupper
12477 %0 = bitcast <2 x i64> %__a to <2 x i64>
12478 %1 = bitcast <2 x i64> %__b to <2 x i64>
12479 %2 = icmp sge <2 x i64> %0, %1
12480 %3 = bitcast i8 %__u to <8 x i1>
12481 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12482 %4 = and <2 x i1> %2, %extract.i
12483 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12484 %6 = bitcast <4 x i1> %5 to i4
12488 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12489 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12490 ; VLX: # %bb.0: # %entry
12491 ; VLX-NEXT: kmovd %edi, %k1
12492 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12493 ; VLX-NEXT: kmovb %k0, %eax
12496 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12497 ; NoVLX: # %bb.0: # %entry
12498 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12499 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12500 ; NoVLX-NEXT: kmovw %edi, %k1
12501 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12502 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12503 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12504 ; NoVLX-NEXT: kmovw %k0, %eax
12505 ; NoVLX-NEXT: vzeroupper
12508 %0 = bitcast <2 x i64> %__a to <2 x i64>
12509 %load = load <2 x i64>, <2 x i64>* %__b
12510 %1 = bitcast <2 x i64> %load to <2 x i64>
12511 %2 = icmp sge <2 x i64> %0, %1
12512 %3 = bitcast i8 %__u to <8 x i1>
12513 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12514 %4 = and <2 x i1> %2, %extract.i
12515 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12516 %6 = bitcast <4 x i1> %5 to i4
12521 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12522 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12523 ; VLX: # %bb.0: # %entry
12524 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12525 ; VLX-NEXT: kmovb %k0, %eax
12528 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12529 ; NoVLX: # %bb.0: # %entry
12530 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12531 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
12532 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12533 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12534 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12535 ; NoVLX-NEXT: kmovw %k0, %eax
12536 ; NoVLX-NEXT: vzeroupper
12539 %0 = bitcast <2 x i64> %__a to <2 x i64>
12540 %load = load i64, i64* %__b
12541 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12542 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12543 %2 = icmp sge <2 x i64> %0, %1
12544 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12545 %4 = bitcast <4 x i1> %3 to i4
12549 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
12550 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12551 ; VLX: # %bb.0: # %entry
12552 ; VLX-NEXT: kmovd %edi, %k1
12553 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12554 ; VLX-NEXT: kmovb %k0, %eax
12557 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12558 ; NoVLX: # %bb.0: # %entry
12559 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12560 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
12561 ; NoVLX-NEXT: kmovw %edi, %k1
12562 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12563 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12564 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12565 ; NoVLX-NEXT: kmovw %k0, %eax
12566 ; NoVLX-NEXT: vzeroupper
12569 %0 = bitcast <2 x i64> %__a to <2 x i64>
12570 %load = load i64, i64* %__b
12571 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12572 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12573 %2 = icmp sge <2 x i64> %0, %1
12574 %3 = bitcast i8 %__u to <8 x i1>
12575 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12576 %4 = and <2 x i1> %extract.i, %2
12577 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12578 %6 = bitcast <4 x i1> %5 to i4
12583 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12584 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12585 ; VLX: # %bb.0: # %entry
12586 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12587 ; VLX-NEXT: kmovd %k0, %eax
12588 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12591 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12592 ; NoVLX: # %bb.0: # %entry
12593 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12594 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12595 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12596 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12597 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12598 ; NoVLX-NEXT: kmovw %k0, %eax
12599 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12600 ; NoVLX-NEXT: vzeroupper
12603 %0 = bitcast <2 x i64> %__a to <2 x i64>
12604 %1 = bitcast <2 x i64> %__b to <2 x i64>
12605 %2 = icmp sge <2 x i64> %0, %1
12606 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12607 %4 = bitcast <8 x i1> %3 to i8
12611 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12612 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12613 ; VLX: # %bb.0: # %entry
12614 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12615 ; VLX-NEXT: kmovd %k0, %eax
12616 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12619 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12620 ; NoVLX: # %bb.0: # %entry
12621 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12622 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12623 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12624 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12625 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12626 ; NoVLX-NEXT: kmovw %k0, %eax
12627 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12628 ; NoVLX-NEXT: vzeroupper
12631 %0 = bitcast <2 x i64> %__a to <2 x i64>
12632 %load = load <2 x i64>, <2 x i64>* %__b
12633 %1 = bitcast <2 x i64> %load to <2 x i64>
12634 %2 = icmp sge <2 x i64> %0, %1
12635 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12636 %4 = bitcast <8 x i1> %3 to i8
12640 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12641 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12642 ; VLX: # %bb.0: # %entry
12643 ; VLX-NEXT: kmovd %edi, %k1
12644 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12645 ; VLX-NEXT: kmovd %k0, %eax
12646 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12649 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12650 ; NoVLX: # %bb.0: # %entry
12651 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12652 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12653 ; NoVLX-NEXT: kmovw %edi, %k1
12654 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12655 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12656 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12657 ; NoVLX-NEXT: kmovw %k0, %eax
12658 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12659 ; NoVLX-NEXT: vzeroupper
12662 %0 = bitcast <2 x i64> %__a to <2 x i64>
12663 %1 = bitcast <2 x i64> %__b to <2 x i64>
12664 %2 = icmp sge <2 x i64> %0, %1
12665 %3 = bitcast i8 %__u to <8 x i1>
12666 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12667 %4 = and <2 x i1> %2, %extract.i
12668 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12669 %6 = bitcast <8 x i1> %5 to i8
12673 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12674 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12675 ; VLX: # %bb.0: # %entry
12676 ; VLX-NEXT: kmovd %edi, %k1
12677 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12678 ; VLX-NEXT: kmovd %k0, %eax
12679 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12682 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12683 ; NoVLX: # %bb.0: # %entry
12684 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12685 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12686 ; NoVLX-NEXT: kmovw %edi, %k1
12687 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12688 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12689 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12690 ; NoVLX-NEXT: kmovw %k0, %eax
12691 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12692 ; NoVLX-NEXT: vzeroupper
12695 %0 = bitcast <2 x i64> %__a to <2 x i64>
12696 %load = load <2 x i64>, <2 x i64>* %__b
12697 %1 = bitcast <2 x i64> %load to <2 x i64>
12698 %2 = icmp sge <2 x i64> %0, %1
12699 %3 = bitcast i8 %__u to <8 x i1>
12700 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12701 %4 = and <2 x i1> %2, %extract.i
12702 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12703 %6 = bitcast <8 x i1> %5 to i8
12708 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12709 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12710 ; VLX: # %bb.0: # %entry
12711 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12712 ; VLX-NEXT: kmovd %k0, %eax
12713 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12716 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12717 ; NoVLX: # %bb.0: # %entry
12718 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12719 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
12720 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12721 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12722 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12723 ; NoVLX-NEXT: kmovw %k0, %eax
12724 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12725 ; NoVLX-NEXT: vzeroupper
12728 %0 = bitcast <2 x i64> %__a to <2 x i64>
12729 %load = load i64, i64* %__b
12730 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12731 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12732 %2 = icmp sge <2 x i64> %0, %1
12733 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12734 %4 = bitcast <8 x i1> %3 to i8
12738 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
12739 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12740 ; VLX: # %bb.0: # %entry
12741 ; VLX-NEXT: kmovd %edi, %k1
12742 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12743 ; VLX-NEXT: kmovd %k0, %eax
12744 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12747 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12748 ; NoVLX: # %bb.0: # %entry
12749 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12750 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
12751 ; NoVLX-NEXT: kmovw %edi, %k1
12752 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12753 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12754 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12755 ; NoVLX-NEXT: kmovw %k0, %eax
12756 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12757 ; NoVLX-NEXT: vzeroupper
12760 %0 = bitcast <2 x i64> %__a to <2 x i64>
12761 %load = load i64, i64* %__b
12762 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12763 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12764 %2 = icmp sge <2 x i64> %0, %1
12765 %3 = bitcast i8 %__u to <8 x i1>
12766 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12767 %4 = and <2 x i1> %extract.i, %2
12768 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12769 %6 = bitcast <8 x i1> %5 to i8
12774 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12775 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12776 ; VLX: # %bb.0: # %entry
12777 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12778 ; VLX-NEXT: kmovd %k0, %eax
12779 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12782 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12783 ; NoVLX: # %bb.0: # %entry
12784 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12785 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12786 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12787 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12788 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12789 ; NoVLX-NEXT: kmovw %k0, %eax
12790 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12791 ; NoVLX-NEXT: vzeroupper
12794 %0 = bitcast <2 x i64> %__a to <2 x i64>
12795 %1 = bitcast <2 x i64> %__b to <2 x i64>
12796 %2 = icmp sge <2 x i64> %0, %1
12797 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12798 %4 = bitcast <16 x i1> %3 to i16
12802 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12803 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12804 ; VLX: # %bb.0: # %entry
12805 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12806 ; VLX-NEXT: kmovd %k0, %eax
12807 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12810 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12811 ; NoVLX: # %bb.0: # %entry
12812 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12813 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12814 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12815 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12816 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12817 ; NoVLX-NEXT: kmovw %k0, %eax
12818 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12819 ; NoVLX-NEXT: vzeroupper
12822 %0 = bitcast <2 x i64> %__a to <2 x i64>
12823 %load = load <2 x i64>, <2 x i64>* %__b
12824 %1 = bitcast <2 x i64> %load to <2 x i64>
12825 %2 = icmp sge <2 x i64> %0, %1
12826 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12827 %4 = bitcast <16 x i1> %3 to i16
12831 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12832 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
12833 ; VLX: # %bb.0: # %entry
12834 ; VLX-NEXT: kmovd %edi, %k1
12835 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12836 ; VLX-NEXT: kmovd %k0, %eax
12837 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12840 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
12841 ; NoVLX: # %bb.0: # %entry
12842 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12843 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12844 ; NoVLX-NEXT: kmovw %edi, %k1
12845 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12846 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12847 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12848 ; NoVLX-NEXT: kmovw %k0, %eax
12849 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12850 ; NoVLX-NEXT: vzeroupper
12853 %0 = bitcast <2 x i64> %__a to <2 x i64>
12854 %1 = bitcast <2 x i64> %__b to <2 x i64>
12855 %2 = icmp sge <2 x i64> %0, %1
12856 %3 = bitcast i8 %__u to <8 x i1>
12857 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12858 %4 = and <2 x i1> %2, %extract.i
12859 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12860 %6 = bitcast <16 x i1> %5 to i16
12864 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12865 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
12866 ; VLX: # %bb.0: # %entry
12867 ; VLX-NEXT: kmovd %edi, %k1
12868 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12869 ; VLX-NEXT: kmovd %k0, %eax
12870 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12873 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
12874 ; NoVLX: # %bb.0: # %entry
12875 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12876 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12877 ; NoVLX-NEXT: kmovw %edi, %k1
12878 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12879 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12880 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12881 ; NoVLX-NEXT: kmovw %k0, %eax
12882 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12883 ; NoVLX-NEXT: vzeroupper
12886 %0 = bitcast <2 x i64> %__a to <2 x i64>
12887 %load = load <2 x i64>, <2 x i64>* %__b
12888 %1 = bitcast <2 x i64> %load to <2 x i64>
12889 %2 = icmp sge <2 x i64> %0, %1
12890 %3 = bitcast i8 %__u to <8 x i1>
12891 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12892 %4 = and <2 x i1> %2, %extract.i
12893 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12894 %6 = bitcast <16 x i1> %5 to i16
12899 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12900 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12901 ; VLX: # %bb.0: # %entry
12902 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12903 ; VLX-NEXT: kmovd %k0, %eax
12904 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12907 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12908 ; NoVLX: # %bb.0: # %entry
12909 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12910 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
12911 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12912 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12913 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12914 ; NoVLX-NEXT: kmovw %k0, %eax
12915 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12916 ; NoVLX-NEXT: vzeroupper
12919 %0 = bitcast <2 x i64> %__a to <2 x i64>
12920 %load = load i64, i64* %__b
12921 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12922 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12923 %2 = icmp sge <2 x i64> %0, %1
12924 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12925 %4 = bitcast <16 x i1> %3 to i16
12929 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
12930 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12931 ; VLX: # %bb.0: # %entry
12932 ; VLX-NEXT: kmovd %edi, %k1
12933 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12934 ; VLX-NEXT: kmovd %k0, %eax
12935 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12938 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12939 ; NoVLX: # %bb.0: # %entry
12940 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12941 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
12942 ; NoVLX-NEXT: kmovw %edi, %k1
12943 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12944 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12945 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12946 ; NoVLX-NEXT: kmovw %k0, %eax
12947 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12948 ; NoVLX-NEXT: vzeroupper
12951 %0 = bitcast <2 x i64> %__a to <2 x i64>
12952 %load = load i64, i64* %__b
12953 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12954 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12955 %2 = icmp sge <2 x i64> %0, %1
12956 %3 = bitcast i8 %__u to <8 x i1>
12957 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12958 %4 = and <2 x i1> %extract.i, %2
12959 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12960 %6 = bitcast <16 x i1> %5 to i16
12965 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12966 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
12967 ; VLX: # %bb.0: # %entry
12968 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12969 ; VLX-NEXT: kmovd %k0, %eax
12972 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
12973 ; NoVLX: # %bb.0: # %entry
12974 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12975 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12976 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12977 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12978 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12979 ; NoVLX-NEXT: kmovw %k0, %eax
12980 ; NoVLX-NEXT: vzeroupper
12983 %0 = bitcast <2 x i64> %__a to <2 x i64>
12984 %1 = bitcast <2 x i64> %__b to <2 x i64>
12985 %2 = icmp sge <2 x i64> %0, %1
12986 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12987 %4 = bitcast <32 x i1> %3 to i32
12991 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12992 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
12993 ; VLX: # %bb.0: # %entry
12994 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12995 ; VLX-NEXT: kmovd %k0, %eax
12998 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
12999 ; NoVLX: # %bb.0: # %entry
13000 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13001 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
13002 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13003 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13004 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13005 ; NoVLX-NEXT: kmovw %k0, %eax
13006 ; NoVLX-NEXT: vzeroupper
13009 %0 = bitcast <2 x i64> %__a to <2 x i64>
13010 %load = load <2 x i64>, <2 x i64>* %__b
13011 %1 = bitcast <2 x i64> %load to <2 x i64>
13012 %2 = icmp sge <2 x i64> %0, %1
13013 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13014 %4 = bitcast <32 x i1> %3 to i32
13018 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13019 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
13020 ; VLX: # %bb.0: # %entry
13021 ; VLX-NEXT: kmovd %edi, %k1
13022 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
13023 ; VLX-NEXT: kmovd %k0, %eax
13026 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
13027 ; NoVLX: # %bb.0: # %entry
13028 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13029 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13030 ; NoVLX-NEXT: kmovw %edi, %k1
13031 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13032 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13033 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13034 ; NoVLX-NEXT: kmovw %k0, %eax
13035 ; NoVLX-NEXT: vzeroupper
13038 %0 = bitcast <2 x i64> %__a to <2 x i64>
13039 %1 = bitcast <2 x i64> %__b to <2 x i64>
13040 %2 = icmp sge <2 x i64> %0, %1
13041 %3 = bitcast i8 %__u to <8 x i1>
13042 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13043 %4 = and <2 x i1> %2, %extract.i
13044 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13045 %6 = bitcast <32 x i1> %5 to i32
13049 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13050 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
13051 ; VLX: # %bb.0: # %entry
13052 ; VLX-NEXT: kmovd %edi, %k1
13053 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
13054 ; VLX-NEXT: kmovd %k0, %eax
13057 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
13058 ; NoVLX: # %bb.0: # %entry
13059 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13060 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
13061 ; NoVLX-NEXT: kmovw %edi, %k1
13062 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13063 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13064 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13065 ; NoVLX-NEXT: kmovw %k0, %eax
13066 ; NoVLX-NEXT: vzeroupper
13069 %0 = bitcast <2 x i64> %__a to <2 x i64>
13070 %load = load <2 x i64>, <2 x i64>* %__b
13071 %1 = bitcast <2 x i64> %load to <2 x i64>
13072 %2 = icmp sge <2 x i64> %0, %1
13073 %3 = bitcast i8 %__u to <8 x i1>
13074 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13075 %4 = and <2 x i1> %2, %extract.i
13076 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13077 %6 = bitcast <32 x i1> %5 to i32
13082 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
13083 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13084 ; VLX: # %bb.0: # %entry
13085 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13086 ; VLX-NEXT: kmovd %k0, %eax
13089 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13090 ; NoVLX: # %bb.0: # %entry
13091 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13092 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
13093 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13094 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13095 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13096 ; NoVLX-NEXT: kmovw %k0, %eax
13097 ; NoVLX-NEXT: vzeroupper
13100 %0 = bitcast <2 x i64> %__a to <2 x i64>
13101 %load = load i64, i64* %__b
13102 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13103 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13104 %2 = icmp sge <2 x i64> %0, %1
13105 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13106 %4 = bitcast <32 x i1> %3 to i32
13110 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
13111 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13112 ; VLX: # %bb.0: # %entry
13113 ; VLX-NEXT: kmovd %edi, %k1
13114 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13115 ; VLX-NEXT: kmovd %k0, %eax
13118 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13119 ; NoVLX: # %bb.0: # %entry
13120 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13121 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
13122 ; NoVLX-NEXT: kmovw %edi, %k1
13123 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13124 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13125 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13126 ; NoVLX-NEXT: kmovw %k0, %eax
13127 ; NoVLX-NEXT: vzeroupper
13130 %0 = bitcast <2 x i64> %__a to <2 x i64>
13131 %load = load i64, i64* %__b
13132 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13133 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13134 %2 = icmp sge <2 x i64> %0, %1
13135 %3 = bitcast i8 %__u to <8 x i1>
13136 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13137 %4 = and <2 x i1> %extract.i, %2
13138 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13139 %6 = bitcast <32 x i1> %5 to i32
13144 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13145 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13146 ; VLX: # %bb.0: # %entry
13147 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
13148 ; VLX-NEXT: kmovq %k0, %rax
13151 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13152 ; NoVLX: # %bb.0: # %entry
13153 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13154 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13155 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13156 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13157 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13158 ; NoVLX-NEXT: kmovw %k0, %eax
13159 ; NoVLX-NEXT: vzeroupper
13162 %0 = bitcast <2 x i64> %__a to <2 x i64>
13163 %1 = bitcast <2 x i64> %__b to <2 x i64>
13164 %2 = icmp sge <2 x i64> %0, %1
13165 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13166 %4 = bitcast <64 x i1> %3 to i64
13170 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13171 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13172 ; VLX: # %bb.0: # %entry
13173 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
13174 ; VLX-NEXT: kmovq %k0, %rax
13177 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13178 ; NoVLX: # %bb.0: # %entry
13179 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13180 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
13181 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13182 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13183 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13184 ; NoVLX-NEXT: kmovw %k0, %eax
13185 ; NoVLX-NEXT: vzeroupper
13188 %0 = bitcast <2 x i64> %__a to <2 x i64>
13189 %load = load <2 x i64>, <2 x i64>* %__b
13190 %1 = bitcast <2 x i64> %load to <2 x i64>
13191 %2 = icmp sge <2 x i64> %0, %1
13192 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13193 %4 = bitcast <64 x i1> %3 to i64
13197 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13198 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13199 ; VLX: # %bb.0: # %entry
13200 ; VLX-NEXT: kmovd %edi, %k1
13201 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
13202 ; VLX-NEXT: kmovq %k0, %rax
13205 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13206 ; NoVLX: # %bb.0: # %entry
13207 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13208 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13209 ; NoVLX-NEXT: kmovw %edi, %k1
13210 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13211 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13212 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13213 ; NoVLX-NEXT: kmovw %k0, %eax
13214 ; NoVLX-NEXT: vzeroupper
13217 %0 = bitcast <2 x i64> %__a to <2 x i64>
13218 %1 = bitcast <2 x i64> %__b to <2 x i64>
13219 %2 = icmp sge <2 x i64> %0, %1
13220 %3 = bitcast i8 %__u to <8 x i1>
13221 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13222 %4 = and <2 x i1> %2, %extract.i
13223 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13224 %6 = bitcast <64 x i1> %5 to i64
13228 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13229 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13230 ; VLX: # %bb.0: # %entry
13231 ; VLX-NEXT: kmovd %edi, %k1
13232 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
13233 ; VLX-NEXT: kmovq %k0, %rax
13236 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13237 ; NoVLX: # %bb.0: # %entry
13238 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13239 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
13240 ; NoVLX-NEXT: kmovw %edi, %k1
13241 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13242 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13243 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13244 ; NoVLX-NEXT: kmovw %k0, %eax
13245 ; NoVLX-NEXT: vzeroupper
13248 %0 = bitcast <2 x i64> %__a to <2 x i64>
13249 %load = load <2 x i64>, <2 x i64>* %__b
13250 %1 = bitcast <2 x i64> %load to <2 x i64>
13251 %2 = icmp sge <2 x i64> %0, %1
13252 %3 = bitcast i8 %__u to <8 x i1>
13253 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13254 %4 = and <2 x i1> %2, %extract.i
13255 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13256 %6 = bitcast <64 x i1> %5 to i64
13261 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
13262 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13263 ; VLX: # %bb.0: # %entry
13264 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13265 ; VLX-NEXT: kmovq %k0, %rax
13268 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13269 ; NoVLX: # %bb.0: # %entry
13270 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13271 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
13272 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13273 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13274 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13275 ; NoVLX-NEXT: kmovw %k0, %eax
13276 ; NoVLX-NEXT: vzeroupper
13279 %0 = bitcast <2 x i64> %__a to <2 x i64>
13280 %load = load i64, i64* %__b
13281 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13282 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13283 %2 = icmp sge <2 x i64> %0, %1
13284 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13285 %4 = bitcast <64 x i1> %3 to i64
13289 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
13290 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13291 ; VLX: # %bb.0: # %entry
13292 ; VLX-NEXT: kmovd %edi, %k1
13293 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13294 ; VLX-NEXT: kmovq %k0, %rax
13297 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13298 ; NoVLX: # %bb.0: # %entry
13299 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13300 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
13301 ; NoVLX-NEXT: kmovw %edi, %k1
13302 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13303 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13304 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13305 ; NoVLX-NEXT: kmovw %k0, %eax
13306 ; NoVLX-NEXT: vzeroupper
13309 %0 = bitcast <2 x i64> %__a to <2 x i64>
13310 %load = load i64, i64* %__b
13311 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13312 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13313 %2 = icmp sge <2 x i64> %0, %1
13314 %3 = bitcast i8 %__u to <8 x i1>
13315 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13316 %4 = and <2 x i1> %extract.i, %2
13317 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13318 %6 = bitcast <64 x i1> %5 to i64
13323 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13324 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13325 ; VLX: # %bb.0: # %entry
13326 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13327 ; VLX-NEXT: kmovd %k0, %eax
13328 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13329 ; VLX-NEXT: vzeroupper
13332 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13333 ; NoVLX: # %bb.0: # %entry
13334 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13335 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13336 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13337 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13338 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13339 ; NoVLX-NEXT: kmovw %k0, %eax
13340 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13341 ; NoVLX-NEXT: vzeroupper
13344 %0 = bitcast <4 x i64> %__a to <4 x i64>
13345 %1 = bitcast <4 x i64> %__b to <4 x i64>
13346 %2 = icmp sge <4 x i64> %0, %1
13347 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13348 %4 = bitcast <8 x i1> %3 to i8
13352 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13353 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13354 ; VLX: # %bb.0: # %entry
13355 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13356 ; VLX-NEXT: kmovd %k0, %eax
13357 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13358 ; VLX-NEXT: vzeroupper
13361 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13362 ; NoVLX: # %bb.0: # %entry
13363 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13364 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13365 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13366 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13367 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13368 ; NoVLX-NEXT: kmovw %k0, %eax
13369 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13370 ; NoVLX-NEXT: vzeroupper
13373 %0 = bitcast <4 x i64> %__a to <4 x i64>
13374 %load = load <4 x i64>, <4 x i64>* %__b
13375 %1 = bitcast <4 x i64> %load to <4 x i64>
13376 %2 = icmp sge <4 x i64> %0, %1
13377 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13378 %4 = bitcast <8 x i1> %3 to i8
13382 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13383 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13384 ; VLX: # %bb.0: # %entry
13385 ; VLX-NEXT: kmovd %edi, %k1
13386 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13387 ; VLX-NEXT: kmovd %k0, %eax
13388 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13389 ; VLX-NEXT: vzeroupper
13392 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13393 ; NoVLX: # %bb.0: # %entry
13394 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13395 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13396 ; NoVLX-NEXT: kmovw %edi, %k1
13397 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13398 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13399 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13400 ; NoVLX-NEXT: kmovw %k0, %eax
13401 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13402 ; NoVLX-NEXT: vzeroupper
13405 %0 = bitcast <4 x i64> %__a to <4 x i64>
13406 %1 = bitcast <4 x i64> %__b to <4 x i64>
13407 %2 = icmp sge <4 x i64> %0, %1
13408 %3 = bitcast i8 %__u to <8 x i1>
13409 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13410 %4 = and <4 x i1> %2, %extract.i
13411 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13412 %6 = bitcast <8 x i1> %5 to i8
13416 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13417 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13418 ; VLX: # %bb.0: # %entry
13419 ; VLX-NEXT: kmovd %edi, %k1
13420 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13421 ; VLX-NEXT: kmovd %k0, %eax
13422 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13423 ; VLX-NEXT: vzeroupper
13426 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13427 ; NoVLX: # %bb.0: # %entry
13428 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13429 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13430 ; NoVLX-NEXT: kmovw %edi, %k1
13431 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13432 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13433 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13434 ; NoVLX-NEXT: kmovw %k0, %eax
13435 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13436 ; NoVLX-NEXT: vzeroupper
13439 %0 = bitcast <4 x i64> %__a to <4 x i64>
13440 %load = load <4 x i64>, <4 x i64>* %__b
13441 %1 = bitcast <4 x i64> %load to <4 x i64>
13442 %2 = icmp sge <4 x i64> %0, %1
13443 %3 = bitcast i8 %__u to <8 x i1>
13444 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13445 %4 = and <4 x i1> %2, %extract.i
13446 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13447 %6 = bitcast <8 x i1> %5 to i8
13452 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13453 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13454 ; VLX: # %bb.0: # %entry
13455 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13456 ; VLX-NEXT: kmovd %k0, %eax
13457 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13458 ; VLX-NEXT: vzeroupper
13461 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13462 ; NoVLX: # %bb.0: # %entry
13463 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13464 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
13465 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13466 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13467 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13468 ; NoVLX-NEXT: kmovw %k0, %eax
13469 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13470 ; NoVLX-NEXT: vzeroupper
13473 %0 = bitcast <4 x i64> %__a to <4 x i64>
13474 %load = load i64, i64* %__b
13475 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13476 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13477 %2 = icmp sge <4 x i64> %0, %1
13478 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13479 %4 = bitcast <8 x i1> %3 to i8
13483 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13484 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13485 ; VLX: # %bb.0: # %entry
13486 ; VLX-NEXT: kmovd %edi, %k1
13487 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13488 ; VLX-NEXT: kmovd %k0, %eax
13489 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13490 ; VLX-NEXT: vzeroupper
13493 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13494 ; NoVLX: # %bb.0: # %entry
13495 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13496 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
13497 ; NoVLX-NEXT: kmovw %edi, %k1
13498 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13499 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13500 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13501 ; NoVLX-NEXT: kmovw %k0, %eax
13502 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13503 ; NoVLX-NEXT: vzeroupper
13506 %0 = bitcast <4 x i64> %__a to <4 x i64>
13507 %load = load i64, i64* %__b
13508 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13509 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13510 %2 = icmp sge <4 x i64> %0, %1
13511 %3 = bitcast i8 %__u to <8 x i1>
13512 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13513 %4 = and <4 x i1> %extract.i, %2
13514 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13515 %6 = bitcast <8 x i1> %5 to i8
13520 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13521 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13522 ; VLX: # %bb.0: # %entry
13523 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13524 ; VLX-NEXT: kmovd %k0, %eax
13525 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13526 ; VLX-NEXT: vzeroupper
13529 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13530 ; NoVLX: # %bb.0: # %entry
13531 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13532 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13533 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13534 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13535 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13536 ; NoVLX-NEXT: kmovw %k0, %eax
13537 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13538 ; NoVLX-NEXT: vzeroupper
13541 %0 = bitcast <4 x i64> %__a to <4 x i64>
13542 %1 = bitcast <4 x i64> %__b to <4 x i64>
13543 %2 = icmp sge <4 x i64> %0, %1
13544 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13545 %4 = bitcast <16 x i1> %3 to i16
13549 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13550 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13551 ; VLX: # %bb.0: # %entry
13552 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13553 ; VLX-NEXT: kmovd %k0, %eax
13554 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13555 ; VLX-NEXT: vzeroupper
13558 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13559 ; NoVLX: # %bb.0: # %entry
13560 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13561 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13562 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13563 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13564 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13565 ; NoVLX-NEXT: kmovw %k0, %eax
13566 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13567 ; NoVLX-NEXT: vzeroupper
13570 %0 = bitcast <4 x i64> %__a to <4 x i64>
13571 %load = load <4 x i64>, <4 x i64>* %__b
13572 %1 = bitcast <4 x i64> %load to <4 x i64>
13573 %2 = icmp sge <4 x i64> %0, %1
13574 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13575 %4 = bitcast <16 x i1> %3 to i16
13579 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13580 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13581 ; VLX: # %bb.0: # %entry
13582 ; VLX-NEXT: kmovd %edi, %k1
13583 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13584 ; VLX-NEXT: kmovd %k0, %eax
13585 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13586 ; VLX-NEXT: vzeroupper
13589 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13590 ; NoVLX: # %bb.0: # %entry
13591 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13592 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13593 ; NoVLX-NEXT: kmovw %edi, %k1
13594 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13595 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13596 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13597 ; NoVLX-NEXT: kmovw %k0, %eax
13598 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13599 ; NoVLX-NEXT: vzeroupper
13602 %0 = bitcast <4 x i64> %__a to <4 x i64>
13603 %1 = bitcast <4 x i64> %__b to <4 x i64>
13604 %2 = icmp sge <4 x i64> %0, %1
13605 %3 = bitcast i8 %__u to <8 x i1>
13606 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13607 %4 = and <4 x i1> %2, %extract.i
13608 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13609 %6 = bitcast <16 x i1> %5 to i16
13613 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13614 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13615 ; VLX: # %bb.0: # %entry
13616 ; VLX-NEXT: kmovd %edi, %k1
13617 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13618 ; VLX-NEXT: kmovd %k0, %eax
13619 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13620 ; VLX-NEXT: vzeroupper
13623 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13624 ; NoVLX: # %bb.0: # %entry
13625 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13626 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13627 ; NoVLX-NEXT: kmovw %edi, %k1
13628 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13629 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13630 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13631 ; NoVLX-NEXT: kmovw %k0, %eax
13632 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13633 ; NoVLX-NEXT: vzeroupper
13636 %0 = bitcast <4 x i64> %__a to <4 x i64>
13637 %load = load <4 x i64>, <4 x i64>* %__b
13638 %1 = bitcast <4 x i64> %load to <4 x i64>
13639 %2 = icmp sge <4 x i64> %0, %1
13640 %3 = bitcast i8 %__u to <8 x i1>
13641 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13642 %4 = and <4 x i1> %2, %extract.i
13643 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13644 %6 = bitcast <16 x i1> %5 to i16
13649 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13650 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13651 ; VLX: # %bb.0: # %entry
13652 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13653 ; VLX-NEXT: kmovd %k0, %eax
13654 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13655 ; VLX-NEXT: vzeroupper
13658 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13659 ; NoVLX: # %bb.0: # %entry
13660 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13661 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
13662 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13663 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13664 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13665 ; NoVLX-NEXT: kmovw %k0, %eax
13666 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13667 ; NoVLX-NEXT: vzeroupper
13670 %0 = bitcast <4 x i64> %__a to <4 x i64>
13671 %load = load i64, i64* %__b
13672 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13673 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13674 %2 = icmp sge <4 x i64> %0, %1
13675 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13676 %4 = bitcast <16 x i1> %3 to i16
13680 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13681 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13682 ; VLX: # %bb.0: # %entry
13683 ; VLX-NEXT: kmovd %edi, %k1
13684 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13685 ; VLX-NEXT: kmovd %k0, %eax
13686 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13687 ; VLX-NEXT: vzeroupper
13690 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13691 ; NoVLX: # %bb.0: # %entry
13692 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13693 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
13694 ; NoVLX-NEXT: kmovw %edi, %k1
13695 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13696 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13697 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13698 ; NoVLX-NEXT: kmovw %k0, %eax
13699 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13700 ; NoVLX-NEXT: vzeroupper
13703 %0 = bitcast <4 x i64> %__a to <4 x i64>
13704 %load = load i64, i64* %__b
13705 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13706 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13707 %2 = icmp sge <4 x i64> %0, %1
13708 %3 = bitcast i8 %__u to <8 x i1>
13709 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13710 %4 = and <4 x i1> %extract.i, %2
13711 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13712 %6 = bitcast <16 x i1> %5 to i16
13717 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13718 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13719 ; VLX: # %bb.0: # %entry
13720 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13721 ; VLX-NEXT: kmovd %k0, %eax
13722 ; VLX-NEXT: vzeroupper
13725 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13726 ; NoVLX: # %bb.0: # %entry
13727 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13728 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13729 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13730 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13731 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13732 ; NoVLX-NEXT: kmovw %k0, %eax
13733 ; NoVLX-NEXT: vzeroupper
13736 %0 = bitcast <4 x i64> %__a to <4 x i64>
13737 %1 = bitcast <4 x i64> %__b to <4 x i64>
13738 %2 = icmp sge <4 x i64> %0, %1
13739 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13740 %4 = bitcast <32 x i1> %3 to i32
13744 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13745 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13746 ; VLX: # %bb.0: # %entry
13747 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13748 ; VLX-NEXT: kmovd %k0, %eax
13749 ; VLX-NEXT: vzeroupper
13752 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13753 ; NoVLX: # %bb.0: # %entry
13754 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13755 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13756 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13757 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13758 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13759 ; NoVLX-NEXT: kmovw %k0, %eax
13760 ; NoVLX-NEXT: vzeroupper
13763 %0 = bitcast <4 x i64> %__a to <4 x i64>
13764 %load = load <4 x i64>, <4 x i64>* %__b
13765 %1 = bitcast <4 x i64> %load to <4 x i64>
13766 %2 = icmp sge <4 x i64> %0, %1
13767 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13768 %4 = bitcast <32 x i1> %3 to i32
13772 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13773 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13774 ; VLX: # %bb.0: # %entry
13775 ; VLX-NEXT: kmovd %edi, %k1
13776 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13777 ; VLX-NEXT: kmovd %k0, %eax
13778 ; VLX-NEXT: vzeroupper
13781 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13782 ; NoVLX: # %bb.0: # %entry
13783 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13784 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13785 ; NoVLX-NEXT: kmovw %edi, %k1
13786 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13787 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13788 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13789 ; NoVLX-NEXT: kmovw %k0, %eax
13790 ; NoVLX-NEXT: vzeroupper
13793 %0 = bitcast <4 x i64> %__a to <4 x i64>
13794 %1 = bitcast <4 x i64> %__b to <4 x i64>
13795 %2 = icmp sge <4 x i64> %0, %1
13796 %3 = bitcast i8 %__u to <8 x i1>
13797 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13798 %4 = and <4 x i1> %2, %extract.i
13799 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13800 %6 = bitcast <32 x i1> %5 to i32
13804 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13805 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13806 ; VLX: # %bb.0: # %entry
13807 ; VLX-NEXT: kmovd %edi, %k1
13808 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13809 ; VLX-NEXT: kmovd %k0, %eax
13810 ; VLX-NEXT: vzeroupper
13813 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13814 ; NoVLX: # %bb.0: # %entry
13815 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13816 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13817 ; NoVLX-NEXT: kmovw %edi, %k1
13818 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13819 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13820 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13821 ; NoVLX-NEXT: kmovw %k0, %eax
13822 ; NoVLX-NEXT: vzeroupper
13825 %0 = bitcast <4 x i64> %__a to <4 x i64>
13826 %load = load <4 x i64>, <4 x i64>* %__b
13827 %1 = bitcast <4 x i64> %load to <4 x i64>
13828 %2 = icmp sge <4 x i64> %0, %1
13829 %3 = bitcast i8 %__u to <8 x i1>
13830 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13831 %4 = and <4 x i1> %2, %extract.i
13832 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13833 %6 = bitcast <32 x i1> %5 to i32
13838 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13839 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13840 ; VLX: # %bb.0: # %entry
13841 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13842 ; VLX-NEXT: kmovd %k0, %eax
13843 ; VLX-NEXT: vzeroupper
13846 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13847 ; NoVLX: # %bb.0: # %entry
13848 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13849 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
13850 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13851 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13852 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13853 ; NoVLX-NEXT: kmovw %k0, %eax
13854 ; NoVLX-NEXT: vzeroupper
13857 %0 = bitcast <4 x i64> %__a to <4 x i64>
13858 %load = load i64, i64* %__b
13859 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13860 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13861 %2 = icmp sge <4 x i64> %0, %1
13862 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13863 %4 = bitcast <32 x i1> %3 to i32
13867 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13868 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13869 ; VLX: # %bb.0: # %entry
13870 ; VLX-NEXT: kmovd %edi, %k1
13871 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13872 ; VLX-NEXT: kmovd %k0, %eax
13873 ; VLX-NEXT: vzeroupper
13876 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13877 ; NoVLX: # %bb.0: # %entry
13878 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13879 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
13880 ; NoVLX-NEXT: kmovw %edi, %k1
13881 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13882 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13883 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13884 ; NoVLX-NEXT: kmovw %k0, %eax
13885 ; NoVLX-NEXT: vzeroupper
13888 %0 = bitcast <4 x i64> %__a to <4 x i64>
13889 %load = load i64, i64* %__b
13890 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13891 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13892 %2 = icmp sge <4 x i64> %0, %1
13893 %3 = bitcast i8 %__u to <8 x i1>
13894 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13895 %4 = and <4 x i1> %extract.i, %2
13896 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13897 %6 = bitcast <32 x i1> %5 to i32
13902 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13903 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
13904 ; VLX: # %bb.0: # %entry
13905 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13906 ; VLX-NEXT: kmovq %k0, %rax
13907 ; VLX-NEXT: vzeroupper
13910 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
13911 ; NoVLX: # %bb.0: # %entry
13912 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13913 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13914 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13915 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13916 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13917 ; NoVLX-NEXT: kmovw %k0, %eax
13918 ; NoVLX-NEXT: vzeroupper
13921 %0 = bitcast <4 x i64> %__a to <4 x i64>
13922 %1 = bitcast <4 x i64> %__b to <4 x i64>
13923 %2 = icmp sge <4 x i64> %0, %1
13924 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13925 %4 = bitcast <64 x i1> %3 to i64
13929 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13930 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
13931 ; VLX: # %bb.0: # %entry
13932 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13933 ; VLX-NEXT: kmovq %k0, %rax
13934 ; VLX-NEXT: vzeroupper
13937 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
13938 ; NoVLX: # %bb.0: # %entry
13939 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13940 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13941 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13942 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13943 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13944 ; NoVLX-NEXT: kmovw %k0, %eax
13945 ; NoVLX-NEXT: vzeroupper
13948 %0 = bitcast <4 x i64> %__a to <4 x i64>
13949 %load = load <4 x i64>, <4 x i64>* %__b
13950 %1 = bitcast <4 x i64> %load to <4 x i64>
13951 %2 = icmp sge <4 x i64> %0, %1
13952 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13953 %4 = bitcast <64 x i1> %3 to i64
13957 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13958 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
13959 ; VLX: # %bb.0: # %entry
13960 ; VLX-NEXT: kmovd %edi, %k1
13961 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13962 ; VLX-NEXT: kmovq %k0, %rax
13963 ; VLX-NEXT: vzeroupper
13966 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
13967 ; NoVLX: # %bb.0: # %entry
13968 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13969 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13970 ; NoVLX-NEXT: kmovw %edi, %k1
13971 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13972 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13973 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13974 ; NoVLX-NEXT: kmovw %k0, %eax
13975 ; NoVLX-NEXT: vzeroupper
13978 %0 = bitcast <4 x i64> %__a to <4 x i64>
13979 %1 = bitcast <4 x i64> %__b to <4 x i64>
13980 %2 = icmp sge <4 x i64> %0, %1
13981 %3 = bitcast i8 %__u to <8 x i1>
13982 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13983 %4 = and <4 x i1> %2, %extract.i
13984 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13985 %6 = bitcast <64 x i1> %5 to i64
13989 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13990 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
13991 ; VLX: # %bb.0: # %entry
13992 ; VLX-NEXT: kmovd %edi, %k1
13993 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13994 ; VLX-NEXT: kmovq %k0, %rax
13995 ; VLX-NEXT: vzeroupper
13998 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
13999 ; NoVLX: # %bb.0: # %entry
14000 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
14001 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
14002 ; NoVLX-NEXT: kmovw %edi, %k1
14003 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14004 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
14005 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
14006 ; NoVLX-NEXT: kmovw %k0, %eax
14007 ; NoVLX-NEXT: vzeroupper
14010 %0 = bitcast <4 x i64> %__a to <4 x i64>
14011 %load = load <4 x i64>, <4 x i64>* %__b
14012 %1 = bitcast <4 x i64> %load to <4 x i64>
14013 %2 = icmp sge <4 x i64> %0, %1
14014 %3 = bitcast i8 %__u to <8 x i1>
14015 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14016 %4 = and <4 x i1> %2, %extract.i
14017 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14018 %6 = bitcast <64 x i1> %5 to i64
14023 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
14024 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14025 ; VLX: # %bb.0: # %entry
14026 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
14027 ; VLX-NEXT: kmovq %k0, %rax
14028 ; VLX-NEXT: vzeroupper
14031 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14032 ; NoVLX: # %bb.0: # %entry
14033 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
14034 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
14035 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14036 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
14037 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
14038 ; NoVLX-NEXT: kmovw %k0, %eax
14039 ; NoVLX-NEXT: vzeroupper
14042 %0 = bitcast <4 x i64> %__a to <4 x i64>
14043 %load = load i64, i64* %__b
14044 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
14045 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
14046 %2 = icmp sge <4 x i64> %0, %1
14047 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14048 %4 = bitcast <64 x i1> %3 to i64
14052 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
14053 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14054 ; VLX: # %bb.0: # %entry
14055 ; VLX-NEXT: kmovd %edi, %k1
14056 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
14057 ; VLX-NEXT: kmovq %k0, %rax
14058 ; VLX-NEXT: vzeroupper
14061 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14062 ; NoVLX: # %bb.0: # %entry
14063 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
14064 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
14065 ; NoVLX-NEXT: kmovw %edi, %k1
14066 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14067 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
14068 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
14069 ; NoVLX-NEXT: kmovw %k0, %eax
14070 ; NoVLX-NEXT: vzeroupper
14073 %0 = bitcast <4 x i64> %__a to <4 x i64>
14074 %load = load i64, i64* %__b
14075 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
14076 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
14077 %2 = icmp sge <4 x i64> %0, %1
14078 %3 = bitcast i8 %__u to <8 x i1>
14079 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14080 %4 = and <4 x i1> %extract.i, %2
14081 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14082 %6 = bitcast <64 x i1> %5 to i64
14087 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14088 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14089 ; VLX: # %bb.0: # %entry
14090 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14091 ; VLX-NEXT: kmovd %k0, %eax
14092 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14093 ; VLX-NEXT: vzeroupper
14096 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14097 ; NoVLX: # %bb.0: # %entry
14098 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14099 ; NoVLX-NEXT: kmovw %k0, %eax
14100 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14101 ; NoVLX-NEXT: vzeroupper
14104 %0 = bitcast <8 x i64> %__a to <8 x i64>
14105 %1 = bitcast <8 x i64> %__b to <8 x i64>
14106 %2 = icmp sge <8 x i64> %0, %1
14107 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14108 %4 = bitcast <16 x i1> %3 to i16
14112 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14113 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14114 ; VLX: # %bb.0: # %entry
14115 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14116 ; VLX-NEXT: kmovd %k0, %eax
14117 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14118 ; VLX-NEXT: vzeroupper
14121 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14122 ; NoVLX: # %bb.0: # %entry
14123 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14124 ; NoVLX-NEXT: kmovw %k0, %eax
14125 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14126 ; NoVLX-NEXT: vzeroupper
14129 %0 = bitcast <8 x i64> %__a to <8 x i64>
14130 %load = load <8 x i64>, <8 x i64>* %__b
14131 %1 = bitcast <8 x i64> %load to <8 x i64>
14132 %2 = icmp sge <8 x i64> %0, %1
14133 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14134 %4 = bitcast <16 x i1> %3 to i16
14138 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14139 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14140 ; VLX: # %bb.0: # %entry
14141 ; VLX-NEXT: kmovd %edi, %k1
14142 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14143 ; VLX-NEXT: kmovd %k0, %eax
14144 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14145 ; VLX-NEXT: vzeroupper
14148 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14149 ; NoVLX: # %bb.0: # %entry
14150 ; NoVLX-NEXT: kmovw %edi, %k1
14151 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14152 ; NoVLX-NEXT: kmovw %k0, %eax
14153 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14154 ; NoVLX-NEXT: vzeroupper
14157 %0 = bitcast <8 x i64> %__a to <8 x i64>
14158 %1 = bitcast <8 x i64> %__b to <8 x i64>
14159 %2 = icmp sge <8 x i64> %0, %1
14160 %3 = bitcast i8 %__u to <8 x i1>
14161 %4 = and <8 x i1> %2, %3
14162 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14163 %6 = bitcast <16 x i1> %5 to i16
14167 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14168 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14169 ; VLX: # %bb.0: # %entry
14170 ; VLX-NEXT: kmovd %edi, %k1
14171 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14172 ; VLX-NEXT: kmovd %k0, %eax
14173 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14174 ; VLX-NEXT: vzeroupper
14177 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14178 ; NoVLX: # %bb.0: # %entry
14179 ; NoVLX-NEXT: kmovw %edi, %k1
14180 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14181 ; NoVLX-NEXT: kmovw %k0, %eax
14182 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14183 ; NoVLX-NEXT: vzeroupper
14186 %0 = bitcast <8 x i64> %__a to <8 x i64>
14187 %load = load <8 x i64>, <8 x i64>* %__b
14188 %1 = bitcast <8 x i64> %load to <8 x i64>
14189 %2 = icmp sge <8 x i64> %0, %1
14190 %3 = bitcast i8 %__u to <8 x i1>
14191 %4 = and <8 x i1> %2, %3
14192 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14193 %6 = bitcast <16 x i1> %5 to i16
14198 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14199 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14200 ; VLX: # %bb.0: # %entry
14201 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14202 ; VLX-NEXT: kmovd %k0, %eax
14203 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14204 ; VLX-NEXT: vzeroupper
14207 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14208 ; NoVLX: # %bb.0: # %entry
14209 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14210 ; NoVLX-NEXT: kmovw %k0, %eax
14211 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14212 ; NoVLX-NEXT: vzeroupper
14215 %0 = bitcast <8 x i64> %__a to <8 x i64>
14216 %load = load i64, i64* %__b
14217 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14218 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14219 %2 = icmp sge <8 x i64> %0, %1
14220 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14221 %4 = bitcast <16 x i1> %3 to i16
14225 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14226 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14227 ; VLX: # %bb.0: # %entry
14228 ; VLX-NEXT: kmovd %edi, %k1
14229 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14230 ; VLX-NEXT: kmovd %k0, %eax
14231 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14232 ; VLX-NEXT: vzeroupper
14235 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14236 ; NoVLX: # %bb.0: # %entry
14237 ; NoVLX-NEXT: kmovw %edi, %k1
14238 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14239 ; NoVLX-NEXT: kmovw %k0, %eax
14240 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14241 ; NoVLX-NEXT: vzeroupper
14244 %0 = bitcast <8 x i64> %__a to <8 x i64>
14245 %load = load i64, i64* %__b
14246 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14247 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14248 %2 = icmp sge <8 x i64> %0, %1
14249 %3 = bitcast i8 %__u to <8 x i1>
14250 %4 = and <8 x i1> %3, %2
14251 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14252 %6 = bitcast <16 x i1> %5 to i16
14257 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14258 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14259 ; VLX: # %bb.0: # %entry
14260 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14261 ; VLX-NEXT: kmovd %k0, %eax
14262 ; VLX-NEXT: vzeroupper
14265 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14266 ; NoVLX: # %bb.0: # %entry
14267 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14268 ; NoVLX-NEXT: kmovw %k0, %eax
14269 ; NoVLX-NEXT: vzeroupper
14272 %0 = bitcast <8 x i64> %__a to <8 x i64>
14273 %1 = bitcast <8 x i64> %__b to <8 x i64>
14274 %2 = icmp sge <8 x i64> %0, %1
14275 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14276 %4 = bitcast <32 x i1> %3 to i32
14280 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14281 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14282 ; VLX: # %bb.0: # %entry
14283 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14284 ; VLX-NEXT: kmovd %k0, %eax
14285 ; VLX-NEXT: vzeroupper
14288 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14289 ; NoVLX: # %bb.0: # %entry
14290 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14291 ; NoVLX-NEXT: kmovw %k0, %eax
14292 ; NoVLX-NEXT: vzeroupper
14295 %0 = bitcast <8 x i64> %__a to <8 x i64>
14296 %load = load <8 x i64>, <8 x i64>* %__b
14297 %1 = bitcast <8 x i64> %load to <8 x i64>
14298 %2 = icmp sge <8 x i64> %0, %1
14299 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14300 %4 = bitcast <32 x i1> %3 to i32
14304 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14305 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14306 ; VLX: # %bb.0: # %entry
14307 ; VLX-NEXT: kmovd %edi, %k1
14308 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14309 ; VLX-NEXT: kmovd %k0, %eax
14310 ; VLX-NEXT: vzeroupper
14313 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14314 ; NoVLX: # %bb.0: # %entry
14315 ; NoVLX-NEXT: kmovw %edi, %k1
14316 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14317 ; NoVLX-NEXT: kmovw %k0, %eax
14318 ; NoVLX-NEXT: vzeroupper
14321 %0 = bitcast <8 x i64> %__a to <8 x i64>
14322 %1 = bitcast <8 x i64> %__b to <8 x i64>
14323 %2 = icmp sge <8 x i64> %0, %1
14324 %3 = bitcast i8 %__u to <8 x i1>
14325 %4 = and <8 x i1> %2, %3
14326 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14327 %6 = bitcast <32 x i1> %5 to i32
14331 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14332 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14333 ; VLX: # %bb.0: # %entry
14334 ; VLX-NEXT: kmovd %edi, %k1
14335 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14336 ; VLX-NEXT: kmovd %k0, %eax
14337 ; VLX-NEXT: vzeroupper
14340 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14341 ; NoVLX: # %bb.0: # %entry
14342 ; NoVLX-NEXT: kmovw %edi, %k1
14343 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14344 ; NoVLX-NEXT: kmovw %k0, %eax
14345 ; NoVLX-NEXT: vzeroupper
14348 %0 = bitcast <8 x i64> %__a to <8 x i64>
14349 %load = load <8 x i64>, <8 x i64>* %__b
14350 %1 = bitcast <8 x i64> %load to <8 x i64>
14351 %2 = icmp sge <8 x i64> %0, %1
14352 %3 = bitcast i8 %__u to <8 x i1>
14353 %4 = and <8 x i1> %2, %3
14354 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14355 %6 = bitcast <32 x i1> %5 to i32
14360 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14361 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14362 ; VLX: # %bb.0: # %entry
14363 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14364 ; VLX-NEXT: kmovd %k0, %eax
14365 ; VLX-NEXT: vzeroupper
14368 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14369 ; NoVLX: # %bb.0: # %entry
14370 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14371 ; NoVLX-NEXT: kmovw %k0, %eax
14372 ; NoVLX-NEXT: vzeroupper
14375 %0 = bitcast <8 x i64> %__a to <8 x i64>
14376 %load = load i64, i64* %__b
14377 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14378 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14379 %2 = icmp sge <8 x i64> %0, %1
14380 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14381 %4 = bitcast <32 x i1> %3 to i32
14385 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14386 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14387 ; VLX: # %bb.0: # %entry
14388 ; VLX-NEXT: kmovd %edi, %k1
14389 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14390 ; VLX-NEXT: kmovd %k0, %eax
14391 ; VLX-NEXT: vzeroupper
14394 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14395 ; NoVLX: # %bb.0: # %entry
14396 ; NoVLX-NEXT: kmovw %edi, %k1
14397 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14398 ; NoVLX-NEXT: kmovw %k0, %eax
14399 ; NoVLX-NEXT: vzeroupper
14402 %0 = bitcast <8 x i64> %__a to <8 x i64>
14403 %load = load i64, i64* %__b
14404 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14405 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14406 %2 = icmp sge <8 x i64> %0, %1
14407 %3 = bitcast i8 %__u to <8 x i1>
14408 %4 = and <8 x i1> %3, %2
14409 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14410 %6 = bitcast <32 x i1> %5 to i32
14415 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14416 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14417 ; VLX: # %bb.0: # %entry
14418 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14419 ; VLX-NEXT: kmovq %k0, %rax
14420 ; VLX-NEXT: vzeroupper
14423 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14424 ; NoVLX: # %bb.0: # %entry
14425 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14426 ; NoVLX-NEXT: kmovw %k0, %eax
14427 ; NoVLX-NEXT: vzeroupper
14430 %0 = bitcast <8 x i64> %__a to <8 x i64>
14431 %1 = bitcast <8 x i64> %__b to <8 x i64>
14432 %2 = icmp sge <8 x i64> %0, %1
14433 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14434 %4 = bitcast <64 x i1> %3 to i64
14438 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14439 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14440 ; VLX: # %bb.0: # %entry
14441 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14442 ; VLX-NEXT: kmovq %k0, %rax
14443 ; VLX-NEXT: vzeroupper
14446 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14447 ; NoVLX: # %bb.0: # %entry
14448 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14449 ; NoVLX-NEXT: kmovw %k0, %eax
14450 ; NoVLX-NEXT: vzeroupper
14453 %0 = bitcast <8 x i64> %__a to <8 x i64>
14454 %load = load <8 x i64>, <8 x i64>* %__b
14455 %1 = bitcast <8 x i64> %load to <8 x i64>
14456 %2 = icmp sge <8 x i64> %0, %1
14457 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14458 %4 = bitcast <64 x i1> %3 to i64
14462 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14463 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14464 ; VLX: # %bb.0: # %entry
14465 ; VLX-NEXT: kmovd %edi, %k1
14466 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14467 ; VLX-NEXT: kmovq %k0, %rax
14468 ; VLX-NEXT: vzeroupper
14471 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14472 ; NoVLX: # %bb.0: # %entry
14473 ; NoVLX-NEXT: kmovw %edi, %k1
14474 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14475 ; NoVLX-NEXT: kmovw %k0, %eax
14476 ; NoVLX-NEXT: vzeroupper
14479 %0 = bitcast <8 x i64> %__a to <8 x i64>
14480 %1 = bitcast <8 x i64> %__b to <8 x i64>
14481 %2 = icmp sge <8 x i64> %0, %1
14482 %3 = bitcast i8 %__u to <8 x i1>
14483 %4 = and <8 x i1> %2, %3
14484 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14485 %6 = bitcast <64 x i1> %5 to i64
14489 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14490 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14491 ; VLX: # %bb.0: # %entry
14492 ; VLX-NEXT: kmovd %edi, %k1
14493 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14494 ; VLX-NEXT: kmovq %k0, %rax
14495 ; VLX-NEXT: vzeroupper
14498 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14499 ; NoVLX: # %bb.0: # %entry
14500 ; NoVLX-NEXT: kmovw %edi, %k1
14501 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14502 ; NoVLX-NEXT: kmovw %k0, %eax
14503 ; NoVLX-NEXT: vzeroupper
14506 %0 = bitcast <8 x i64> %__a to <8 x i64>
14507 %load = load <8 x i64>, <8 x i64>* %__b
14508 %1 = bitcast <8 x i64> %load to <8 x i64>
14509 %2 = icmp sge <8 x i64> %0, %1
14510 %3 = bitcast i8 %__u to <8 x i1>
14511 %4 = and <8 x i1> %2, %3
14512 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14513 %6 = bitcast <64 x i1> %5 to i64
14518 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14519 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14520 ; VLX: # %bb.0: # %entry
14521 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14522 ; VLX-NEXT: kmovq %k0, %rax
14523 ; VLX-NEXT: vzeroupper
14526 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14527 ; NoVLX: # %bb.0: # %entry
14528 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14529 ; NoVLX-NEXT: kmovw %k0, %eax
14530 ; NoVLX-NEXT: vzeroupper
14533 %0 = bitcast <8 x i64> %__a to <8 x i64>
14534 %load = load i64, i64* %__b
14535 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14536 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14537 %2 = icmp sge <8 x i64> %0, %1
14538 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14539 %4 = bitcast <64 x i1> %3 to i64
14543 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14544 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14545 ; VLX: # %bb.0: # %entry
14546 ; VLX-NEXT: kmovd %edi, %k1
14547 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14548 ; VLX-NEXT: kmovq %k0, %rax
14549 ; VLX-NEXT: vzeroupper
14552 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14553 ; NoVLX: # %bb.0: # %entry
14554 ; NoVLX-NEXT: kmovw %edi, %k1
14555 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14556 ; NoVLX-NEXT: kmovw %k0, %eax
14557 ; NoVLX-NEXT: vzeroupper
14560 %0 = bitcast <8 x i64> %__a to <8 x i64>
14561 %load = load i64, i64* %__b
14562 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14563 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14564 %2 = icmp sge <8 x i64> %0, %1
14565 %3 = bitcast i8 %__u to <8 x i1>
14566 %4 = and <8 x i1> %3, %2
14567 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14568 %6 = bitcast <64 x i1> %5 to i64
14573 define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14574 ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14575 ; VLX: # %bb.0: # %entry
14576 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
14577 ; VLX-NEXT: kmovd %k0, %eax
14580 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14581 ; NoVLX: # %bb.0: # %entry
14582 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14583 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14584 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14585 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14586 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14587 ; NoVLX-NEXT: kmovw %k0, %eax
14588 ; NoVLX-NEXT: vzeroupper
14591 %0 = bitcast <2 x i64> %__a to <16 x i8>
14592 %1 = bitcast <2 x i64> %__b to <16 x i8>
14593 %2 = icmp ult <16 x i8> %0, %1
14594 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14595 %4 = bitcast <32 x i1> %3 to i32
14599 define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14600 ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14601 ; VLX: # %bb.0: # %entry
14602 ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
14603 ; VLX-NEXT: kmovd %k0, %eax
14606 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14607 ; NoVLX: # %bb.0: # %entry
14608 ; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1
14609 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14610 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14611 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14612 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14613 ; NoVLX-NEXT: kmovw %k0, %eax
14614 ; NoVLX-NEXT: vzeroupper
14617 %0 = bitcast <2 x i64> %__a to <16 x i8>
14618 %load = load <2 x i64>, <2 x i64>* %__b
14619 %1 = bitcast <2 x i64> %load to <16 x i8>
14620 %2 = icmp ult <16 x i8> %0, %1
14621 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14622 %4 = bitcast <32 x i1> %3 to i32
14626 define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14627 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14628 ; VLX: # %bb.0: # %entry
14629 ; VLX-NEXT: kmovd %edi, %k1
14630 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14631 ; VLX-NEXT: kmovd %k0, %eax
14634 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14635 ; NoVLX: # %bb.0: # %entry
14636 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14637 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14638 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14639 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14640 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14641 ; NoVLX-NEXT: kmovw %k0, %eax
14642 ; NoVLX-NEXT: andl %edi, %eax
14643 ; NoVLX-NEXT: vzeroupper
14646 %0 = bitcast <2 x i64> %__a to <16 x i8>
14647 %1 = bitcast <2 x i64> %__b to <16 x i8>
14648 %2 = icmp ult <16 x i8> %0, %1
14649 %3 = bitcast i16 %__u to <16 x i1>
14650 %4 = and <16 x i1> %2, %3
14651 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14652 %6 = bitcast <32 x i1> %5 to i32
14656 define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14657 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14658 ; VLX: # %bb.0: # %entry
14659 ; VLX-NEXT: kmovd %edi, %k1
14660 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14661 ; VLX-NEXT: kmovd %k0, %eax
14664 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14665 ; NoVLX: # %bb.0: # %entry
14666 ; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1
14667 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14668 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14669 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14670 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14671 ; NoVLX-NEXT: kmovw %k0, %eax
14672 ; NoVLX-NEXT: andl %edi, %eax
14673 ; NoVLX-NEXT: vzeroupper
14676 %0 = bitcast <2 x i64> %__a to <16 x i8>
14677 %load = load <2 x i64>, <2 x i64>* %__b
14678 %1 = bitcast <2 x i64> %load to <16 x i8>
14679 %2 = icmp ult <16 x i8> %0, %1
14680 %3 = bitcast i16 %__u to <16 x i1>
14681 %4 = and <16 x i1> %2, %3
14682 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14683 %6 = bitcast <32 x i1> %5 to i32
14688 define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14689 ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14690 ; VLX: # %bb.0: # %entry
14691 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
14692 ; VLX-NEXT: kmovq %k0, %rax
14695 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14696 ; NoVLX: # %bb.0: # %entry
14697 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14698 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14699 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14700 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14701 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14702 ; NoVLX-NEXT: kmovw %k0, %eax
14703 ; NoVLX-NEXT: vzeroupper
14706 %0 = bitcast <2 x i64> %__a to <16 x i8>
14707 %1 = bitcast <2 x i64> %__b to <16 x i8>
14708 %2 = icmp ult <16 x i8> %0, %1
14709 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14710 %4 = bitcast <64 x i1> %3 to i64
14714 define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14715 ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14716 ; VLX: # %bb.0: # %entry
14717 ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
14718 ; VLX-NEXT: kmovq %k0, %rax
14721 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14722 ; NoVLX: # %bb.0: # %entry
14723 ; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1
14724 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14725 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14726 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14727 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14728 ; NoVLX-NEXT: kmovw %k0, %eax
14729 ; NoVLX-NEXT: vzeroupper
14732 %0 = bitcast <2 x i64> %__a to <16 x i8>
14733 %load = load <2 x i64>, <2 x i64>* %__b
14734 %1 = bitcast <2 x i64> %load to <16 x i8>
14735 %2 = icmp ult <16 x i8> %0, %1
14736 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14737 %4 = bitcast <64 x i1> %3 to i64
14741 define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14742 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14743 ; VLX: # %bb.0: # %entry
14744 ; VLX-NEXT: kmovd %edi, %k1
14745 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14746 ; VLX-NEXT: kmovq %k0, %rax
14749 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14750 ; NoVLX: # %bb.0: # %entry
14751 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14752 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14753 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14754 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14755 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14756 ; NoVLX-NEXT: kmovw %k0, %eax
14757 ; NoVLX-NEXT: andl %edi, %eax
14758 ; NoVLX-NEXT: vzeroupper
14761 %0 = bitcast <2 x i64> %__a to <16 x i8>
14762 %1 = bitcast <2 x i64> %__b to <16 x i8>
14763 %2 = icmp ult <16 x i8> %0, %1
14764 %3 = bitcast i16 %__u to <16 x i1>
14765 %4 = and <16 x i1> %2, %3
14766 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14767 %6 = bitcast <64 x i1> %5 to i64
14771 define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14772 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14773 ; VLX: # %bb.0: # %entry
14774 ; VLX-NEXT: kmovd %edi, %k1
14775 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14776 ; VLX-NEXT: kmovq %k0, %rax
14779 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14780 ; NoVLX: # %bb.0: # %entry
14781 ; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1
14782 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14783 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14784 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14785 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14786 ; NoVLX-NEXT: kmovw %k0, %eax
14787 ; NoVLX-NEXT: andl %edi, %eax
14788 ; NoVLX-NEXT: vzeroupper
14791 %0 = bitcast <2 x i64> %__a to <16 x i8>
14792 %load = load <2 x i64>, <2 x i64>* %__b
14793 %1 = bitcast <2 x i64> %load to <16 x i8>
14794 %2 = icmp ult <16 x i8> %0, %1
14795 %3 = bitcast i16 %__u to <16 x i1>
14796 %4 = and <16 x i1> %2, %3
14797 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14798 %6 = bitcast <64 x i1> %5 to i64
14803 define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14804 ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
14805 ; VLX: # %bb.0: # %entry
14806 ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0
14807 ; VLX-NEXT: kmovq %k0, %rax
14808 ; VLX-NEXT: vzeroupper
14811 ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
14812 ; NoVLX: # %bb.0: # %entry
14813 ; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1
14814 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14815 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14816 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14817 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14818 ; NoVLX-NEXT: kmovw %k0, %ecx
14819 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14820 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14821 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14822 ; NoVLX-NEXT: kmovw %k0, %eax
14823 ; NoVLX-NEXT: shll $16, %eax
14824 ; NoVLX-NEXT: orl %ecx, %eax
14825 ; NoVLX-NEXT: vzeroupper
14828 %0 = bitcast <4 x i64> %__a to <32 x i8>
14829 %1 = bitcast <4 x i64> %__b to <32 x i8>
14830 %2 = icmp ult <32 x i8> %0, %1
14831 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14832 %4 = bitcast <64 x i1> %3 to i64
14836 define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
14837 ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
14838 ; VLX: # %bb.0: # %entry
14839 ; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0
14840 ; VLX-NEXT: kmovq %k0, %rax
14841 ; VLX-NEXT: vzeroupper
14844 ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
14845 ; NoVLX: # %bb.0: # %entry
14846 ; NoVLX-NEXT: vpmaxub (%rdi), %ymm0, %ymm1
14847 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14848 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14849 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14850 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14851 ; NoVLX-NEXT: kmovw %k0, %ecx
14852 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14853 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14854 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14855 ; NoVLX-NEXT: kmovw %k0, %eax
14856 ; NoVLX-NEXT: shll $16, %eax
14857 ; NoVLX-NEXT: orl %ecx, %eax
14858 ; NoVLX-NEXT: vzeroupper
14861 %0 = bitcast <4 x i64> %__a to <32 x i8>
14862 %load = load <4 x i64>, <4 x i64>* %__b
14863 %1 = bitcast <4 x i64> %load to <32 x i8>
14864 %2 = icmp ult <32 x i8> %0, %1
14865 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14866 %4 = bitcast <64 x i1> %3 to i64
14870 define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14871 ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
14872 ; VLX: # %bb.0: # %entry
14873 ; VLX-NEXT: kmovd %edi, %k1
14874 ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1}
14875 ; VLX-NEXT: kmovq %k0, %rax
14876 ; VLX-NEXT: vzeroupper
14879 ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
14880 ; NoVLX: # %bb.0: # %entry
14881 ; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1
14882 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14883 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14884 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14885 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14886 ; NoVLX-NEXT: kmovw %k0, %eax
14887 ; NoVLX-NEXT: andl %edi, %eax
14888 ; NoVLX-NEXT: shrl $16, %edi
14889 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14890 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14891 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14892 ; NoVLX-NEXT: kmovw %k0, %ecx
14893 ; NoVLX-NEXT: andl %edi, %ecx
14894 ; NoVLX-NEXT: shll $16, %ecx
14895 ; NoVLX-NEXT: movzwl %ax, %eax
14896 ; NoVLX-NEXT: orl %ecx, %eax
14897 ; NoVLX-NEXT: vzeroupper
14900 %0 = bitcast <4 x i64> %__a to <32 x i8>
14901 %1 = bitcast <4 x i64> %__b to <32 x i8>
14902 %2 = icmp ult <32 x i8> %0, %1
14903 %3 = bitcast i32 %__u to <32 x i1>
14904 %4 = and <32 x i1> %2, %3
14905 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14906 %6 = bitcast <64 x i1> %5 to i64
14910 define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
14911 ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
14912 ; VLX: # %bb.0: # %entry
14913 ; VLX-NEXT: kmovd %edi, %k1
14914 ; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1}
14915 ; VLX-NEXT: kmovq %k0, %rax
14916 ; VLX-NEXT: vzeroupper
14919 ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
14920 ; NoVLX: # %bb.0: # %entry
14921 ; NoVLX-NEXT: vpmaxub (%rsi), %ymm0, %ymm1
14922 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14923 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14924 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14925 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14926 ; NoVLX-NEXT: kmovw %k0, %eax
14927 ; NoVLX-NEXT: andl %edi, %eax
14928 ; NoVLX-NEXT: shrl $16, %edi
14929 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14930 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14931 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14932 ; NoVLX-NEXT: kmovw %k0, %ecx
14933 ; NoVLX-NEXT: andl %edi, %ecx
14934 ; NoVLX-NEXT: shll $16, %ecx
14935 ; NoVLX-NEXT: movzwl %ax, %eax
14936 ; NoVLX-NEXT: orl %ecx, %eax
14937 ; NoVLX-NEXT: vzeroupper
14940 %0 = bitcast <4 x i64> %__a to <32 x i8>
14941 %load = load <4 x i64>, <4 x i64>* %__b
14942 %1 = bitcast <4 x i64> %load to <32 x i8>
14943 %2 = icmp ult <32 x i8> %0, %1
14944 %3 = bitcast i32 %__u to <32 x i1>
14945 %4 = and <32 x i1> %2, %3
14946 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14947 %6 = bitcast <64 x i1> %5 to i64
14952 define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14953 ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
14954 ; VLX: # %bb.0: # %entry
14955 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
14956 ; VLX-NEXT: kmovd %k0, %eax
14957 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14960 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
14961 ; NoVLX: # %bb.0: # %entry
14962 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
14963 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14964 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14965 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14966 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14967 ; NoVLX-NEXT: kmovw %k0, %eax
14968 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14969 ; NoVLX-NEXT: vzeroupper
14972 %0 = bitcast <2 x i64> %__a to <8 x i16>
14973 %1 = bitcast <2 x i64> %__b to <8 x i16>
14974 %2 = icmp ult <8 x i16> %0, %1
14975 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14976 %4 = bitcast <16 x i1> %3 to i16
14980 define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14981 ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
14982 ; VLX: # %bb.0: # %entry
14983 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
14984 ; VLX-NEXT: kmovd %k0, %eax
14985 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14988 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
14989 ; NoVLX: # %bb.0: # %entry
14990 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
14991 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14992 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14993 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14994 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14995 ; NoVLX-NEXT: kmovw %k0, %eax
14996 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14997 ; NoVLX-NEXT: vzeroupper
15000 %0 = bitcast <2 x i64> %__a to <8 x i16>
15001 %load = load <2 x i64>, <2 x i64>* %__b
15002 %1 = bitcast <2 x i64> %load to <8 x i16>
15003 %2 = icmp ult <8 x i16> %0, %1
15004 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15005 %4 = bitcast <16 x i1> %3 to i16
15009 define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15010 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
15011 ; VLX: # %bb.0: # %entry
15012 ; VLX-NEXT: kmovd %edi, %k1
15013 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15014 ; VLX-NEXT: kmovd %k0, %eax
15015 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15018 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
15019 ; NoVLX: # %bb.0: # %entry
15020 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15021 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15022 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15023 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15024 ; NoVLX-NEXT: kmovw %edi, %k1
15025 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15026 ; NoVLX-NEXT: kmovw %k0, %eax
15027 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15028 ; NoVLX-NEXT: vzeroupper
15031 %0 = bitcast <2 x i64> %__a to <8 x i16>
15032 %1 = bitcast <2 x i64> %__b to <8 x i16>
15033 %2 = icmp ult <8 x i16> %0, %1
15034 %3 = bitcast i8 %__u to <8 x i1>
15035 %4 = and <8 x i1> %2, %3
15036 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15037 %6 = bitcast <16 x i1> %5 to i16
15041 define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15042 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
15043 ; VLX: # %bb.0: # %entry
15044 ; VLX-NEXT: kmovd %edi, %k1
15045 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15046 ; VLX-NEXT: kmovd %k0, %eax
15047 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15050 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
15051 ; NoVLX: # %bb.0: # %entry
15052 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
15053 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15054 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15055 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15056 ; NoVLX-NEXT: kmovw %edi, %k1
15057 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15058 ; NoVLX-NEXT: kmovw %k0, %eax
15059 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15060 ; NoVLX-NEXT: vzeroupper
15063 %0 = bitcast <2 x i64> %__a to <8 x i16>
15064 %load = load <2 x i64>, <2 x i64>* %__b
15065 %1 = bitcast <2 x i64> %load to <8 x i16>
15066 %2 = icmp ult <8 x i16> %0, %1
15067 %3 = bitcast i8 %__u to <8 x i1>
15068 %4 = and <8 x i1> %2, %3
15069 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15070 %6 = bitcast <16 x i1> %5 to i16
15075 define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15076 ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
15077 ; VLX: # %bb.0: # %entry
15078 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
15079 ; VLX-NEXT: kmovd %k0, %eax
15082 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
15083 ; NoVLX: # %bb.0: # %entry
15084 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15085 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15086 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15087 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15088 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15089 ; NoVLX-NEXT: kmovw %k0, %eax
15090 ; NoVLX-NEXT: vzeroupper
15093 %0 = bitcast <2 x i64> %__a to <8 x i16>
15094 %1 = bitcast <2 x i64> %__b to <8 x i16>
15095 %2 = icmp ult <8 x i16> %0, %1
15096 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15097 %4 = bitcast <32 x i1> %3 to i32
15101 define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15102 ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15103 ; VLX: # %bb.0: # %entry
15104 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
15105 ; VLX-NEXT: kmovd %k0, %eax
15108 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15109 ; NoVLX: # %bb.0: # %entry
15110 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
15111 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15112 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15113 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15114 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15115 ; NoVLX-NEXT: kmovw %k0, %eax
15116 ; NoVLX-NEXT: vzeroupper
15119 %0 = bitcast <2 x i64> %__a to <8 x i16>
15120 %load = load <2 x i64>, <2 x i64>* %__b
15121 %1 = bitcast <2 x i64> %load to <8 x i16>
15122 %2 = icmp ult <8 x i16> %0, %1
15123 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15124 %4 = bitcast <32 x i1> %3 to i32
15128 define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15129 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15130 ; VLX: # %bb.0: # %entry
15131 ; VLX-NEXT: kmovd %edi, %k1
15132 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15133 ; VLX-NEXT: kmovd %k0, %eax
15136 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15137 ; NoVLX: # %bb.0: # %entry
15138 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15139 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15140 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15141 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15142 ; NoVLX-NEXT: kmovw %edi, %k1
15143 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15144 ; NoVLX-NEXT: kmovw %k0, %eax
15145 ; NoVLX-NEXT: vzeroupper
15148 %0 = bitcast <2 x i64> %__a to <8 x i16>
15149 %1 = bitcast <2 x i64> %__b to <8 x i16>
15150 %2 = icmp ult <8 x i16> %0, %1
15151 %3 = bitcast i8 %__u to <8 x i1>
15152 %4 = and <8 x i1> %2, %3
15153 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15154 %6 = bitcast <32 x i1> %5 to i32
15158 define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15159 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15160 ; VLX: # %bb.0: # %entry
15161 ; VLX-NEXT: kmovd %edi, %k1
15162 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15163 ; VLX-NEXT: kmovd %k0, %eax
15166 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15167 ; NoVLX: # %bb.0: # %entry
15168 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
15169 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15170 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15171 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15172 ; NoVLX-NEXT: kmovw %edi, %k1
15173 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15174 ; NoVLX-NEXT: kmovw %k0, %eax
15175 ; NoVLX-NEXT: vzeroupper
15178 %0 = bitcast <2 x i64> %__a to <8 x i16>
15179 %load = load <2 x i64>, <2 x i64>* %__b
15180 %1 = bitcast <2 x i64> %load to <8 x i16>
15181 %2 = icmp ult <8 x i16> %0, %1
15182 %3 = bitcast i8 %__u to <8 x i1>
15183 %4 = and <8 x i1> %2, %3
15184 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15185 %6 = bitcast <32 x i1> %5 to i32
15190 define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15191 ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15192 ; VLX: # %bb.0: # %entry
15193 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
15194 ; VLX-NEXT: kmovq %k0, %rax
15197 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15198 ; NoVLX: # %bb.0: # %entry
15199 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15200 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15201 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15202 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15203 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15204 ; NoVLX-NEXT: kmovw %k0, %eax
15205 ; NoVLX-NEXT: vzeroupper
15208 %0 = bitcast <2 x i64> %__a to <8 x i16>
15209 %1 = bitcast <2 x i64> %__b to <8 x i16>
15210 %2 = icmp ult <8 x i16> %0, %1
15211 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15212 %4 = bitcast <64 x i1> %3 to i64
15216 define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15217 ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15218 ; VLX: # %bb.0: # %entry
15219 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
15220 ; VLX-NEXT: kmovq %k0, %rax
15223 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15224 ; NoVLX: # %bb.0: # %entry
15225 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
15226 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15227 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15228 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15229 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15230 ; NoVLX-NEXT: kmovw %k0, %eax
15231 ; NoVLX-NEXT: vzeroupper
15234 %0 = bitcast <2 x i64> %__a to <8 x i16>
15235 %load = load <2 x i64>, <2 x i64>* %__b
15236 %1 = bitcast <2 x i64> %load to <8 x i16>
15237 %2 = icmp ult <8 x i16> %0, %1
15238 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15239 %4 = bitcast <64 x i1> %3 to i64
15243 define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15244 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15245 ; VLX: # %bb.0: # %entry
15246 ; VLX-NEXT: kmovd %edi, %k1
15247 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15248 ; VLX-NEXT: kmovq %k0, %rax
15251 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15252 ; NoVLX: # %bb.0: # %entry
15253 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15254 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15255 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15256 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15257 ; NoVLX-NEXT: kmovw %edi, %k1
15258 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15259 ; NoVLX-NEXT: kmovw %k0, %eax
15260 ; NoVLX-NEXT: vzeroupper
15263 %0 = bitcast <2 x i64> %__a to <8 x i16>
15264 %1 = bitcast <2 x i64> %__b to <8 x i16>
15265 %2 = icmp ult <8 x i16> %0, %1
15266 %3 = bitcast i8 %__u to <8 x i1>
15267 %4 = and <8 x i1> %2, %3
15268 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15269 %6 = bitcast <64 x i1> %5 to i64
15273 define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15274 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15275 ; VLX: # %bb.0: # %entry
15276 ; VLX-NEXT: kmovd %edi, %k1
15277 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15278 ; VLX-NEXT: kmovq %k0, %rax
15281 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15282 ; NoVLX: # %bb.0: # %entry
15283 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
15284 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15285 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15286 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15287 ; NoVLX-NEXT: kmovw %edi, %k1
15288 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15289 ; NoVLX-NEXT: kmovw %k0, %eax
15290 ; NoVLX-NEXT: vzeroupper
15293 %0 = bitcast <2 x i64> %__a to <8 x i16>
15294 %load = load <2 x i64>, <2 x i64>* %__b
15295 %1 = bitcast <2 x i64> %load to <8 x i16>
15296 %2 = icmp ult <8 x i16> %0, %1
15297 %3 = bitcast i8 %__u to <8 x i1>
15298 %4 = and <8 x i1> %2, %3
15299 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15300 %6 = bitcast <64 x i1> %5 to i64
15305 define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15306 ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15307 ; VLX: # %bb.0: # %entry
15308 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
15309 ; VLX-NEXT: kmovd %k0, %eax
15310 ; VLX-NEXT: vzeroupper
15313 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15314 ; NoVLX: # %bb.0: # %entry
15315 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15316 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15317 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15318 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15319 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15320 ; NoVLX-NEXT: kmovw %k0, %eax
15321 ; NoVLX-NEXT: vzeroupper
15324 %0 = bitcast <4 x i64> %__a to <16 x i16>
15325 %1 = bitcast <4 x i64> %__b to <16 x i16>
15326 %2 = icmp ult <16 x i16> %0, %1
15327 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15328 %4 = bitcast <32 x i1> %3 to i32
15332 define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15333 ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15334 ; VLX: # %bb.0: # %entry
15335 ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
15336 ; VLX-NEXT: kmovd %k0, %eax
15337 ; VLX-NEXT: vzeroupper
15340 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15341 ; NoVLX: # %bb.0: # %entry
15342 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1
15343 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15344 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15345 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15346 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15347 ; NoVLX-NEXT: kmovw %k0, %eax
15348 ; NoVLX-NEXT: vzeroupper
15351 %0 = bitcast <4 x i64> %__a to <16 x i16>
15352 %load = load <4 x i64>, <4 x i64>* %__b
15353 %1 = bitcast <4 x i64> %load to <16 x i16>
15354 %2 = icmp ult <16 x i16> %0, %1
15355 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15356 %4 = bitcast <32 x i1> %3 to i32
15360 define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15361 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15362 ; VLX: # %bb.0: # %entry
15363 ; VLX-NEXT: kmovd %edi, %k1
15364 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15365 ; VLX-NEXT: kmovd %k0, %eax
15366 ; VLX-NEXT: vzeroupper
15369 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15370 ; NoVLX: # %bb.0: # %entry
15371 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15372 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15373 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15374 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15375 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15376 ; NoVLX-NEXT: kmovw %k0, %eax
15377 ; NoVLX-NEXT: andl %edi, %eax
15378 ; NoVLX-NEXT: vzeroupper
15381 %0 = bitcast <4 x i64> %__a to <16 x i16>
15382 %1 = bitcast <4 x i64> %__b to <16 x i16>
15383 %2 = icmp ult <16 x i16> %0, %1
15384 %3 = bitcast i16 %__u to <16 x i1>
15385 %4 = and <16 x i1> %2, %3
15386 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15387 %6 = bitcast <32 x i1> %5 to i32
15391 define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15392 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15393 ; VLX: # %bb.0: # %entry
15394 ; VLX-NEXT: kmovd %edi, %k1
15395 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15396 ; VLX-NEXT: kmovd %k0, %eax
15397 ; VLX-NEXT: vzeroupper
15400 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15401 ; NoVLX: # %bb.0: # %entry
15402 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15403 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15404 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15405 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15406 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15407 ; NoVLX-NEXT: kmovw %k0, %eax
15408 ; NoVLX-NEXT: andl %edi, %eax
15409 ; NoVLX-NEXT: vzeroupper
15412 %0 = bitcast <4 x i64> %__a to <16 x i16>
15413 %load = load <4 x i64>, <4 x i64>* %__b
15414 %1 = bitcast <4 x i64> %load to <16 x i16>
15415 %2 = icmp ult <16 x i16> %0, %1
15416 %3 = bitcast i16 %__u to <16 x i1>
15417 %4 = and <16 x i1> %2, %3
15418 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15419 %6 = bitcast <32 x i1> %5 to i32
15424 define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15425 ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15426 ; VLX: # %bb.0: # %entry
15427 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
15428 ; VLX-NEXT: kmovq %k0, %rax
15429 ; VLX-NEXT: vzeroupper
15432 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15433 ; NoVLX: # %bb.0: # %entry
15434 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15435 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15436 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15437 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15438 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15439 ; NoVLX-NEXT: kmovw %k0, %eax
15440 ; NoVLX-NEXT: vzeroupper
15443 %0 = bitcast <4 x i64> %__a to <16 x i16>
15444 %1 = bitcast <4 x i64> %__b to <16 x i16>
15445 %2 = icmp ult <16 x i16> %0, %1
15446 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15447 %4 = bitcast <64 x i1> %3 to i64
15451 define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15452 ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15453 ; VLX: # %bb.0: # %entry
15454 ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
15455 ; VLX-NEXT: kmovq %k0, %rax
15456 ; VLX-NEXT: vzeroupper
15459 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15460 ; NoVLX: # %bb.0: # %entry
15461 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1
15462 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15463 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15464 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15465 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15466 ; NoVLX-NEXT: kmovw %k0, %eax
15467 ; NoVLX-NEXT: vzeroupper
15470 %0 = bitcast <4 x i64> %__a to <16 x i16>
15471 %load = load <4 x i64>, <4 x i64>* %__b
15472 %1 = bitcast <4 x i64> %load to <16 x i16>
15473 %2 = icmp ult <16 x i16> %0, %1
15474 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15475 %4 = bitcast <64 x i1> %3 to i64
15479 define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15480 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15481 ; VLX: # %bb.0: # %entry
15482 ; VLX-NEXT: kmovd %edi, %k1
15483 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15484 ; VLX-NEXT: kmovq %k0, %rax
15485 ; VLX-NEXT: vzeroupper
15488 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15489 ; NoVLX: # %bb.0: # %entry
15490 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15491 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15492 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15493 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15494 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15495 ; NoVLX-NEXT: kmovw %k0, %eax
15496 ; NoVLX-NEXT: andl %edi, %eax
15497 ; NoVLX-NEXT: vzeroupper
15500 %0 = bitcast <4 x i64> %__a to <16 x i16>
15501 %1 = bitcast <4 x i64> %__b to <16 x i16>
15502 %2 = icmp ult <16 x i16> %0, %1
15503 %3 = bitcast i16 %__u to <16 x i1>
15504 %4 = and <16 x i1> %2, %3
15505 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15506 %6 = bitcast <64 x i1> %5 to i64
15510 define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15511 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15512 ; VLX: # %bb.0: # %entry
15513 ; VLX-NEXT: kmovd %edi, %k1
15514 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15515 ; VLX-NEXT: kmovq %k0, %rax
15516 ; VLX-NEXT: vzeroupper
15519 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15520 ; NoVLX: # %bb.0: # %entry
15521 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15522 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15523 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15524 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15525 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15526 ; NoVLX-NEXT: kmovw %k0, %eax
15527 ; NoVLX-NEXT: andl %edi, %eax
15528 ; NoVLX-NEXT: vzeroupper
15531 %0 = bitcast <4 x i64> %__a to <16 x i16>
15532 %load = load <4 x i64>, <4 x i64>* %__b
15533 %1 = bitcast <4 x i64> %load to <16 x i16>
15534 %2 = icmp ult <16 x i16> %0, %1
15535 %3 = bitcast i16 %__u to <16 x i1>
15536 %4 = and <16 x i1> %2, %3
15537 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15538 %6 = bitcast <64 x i1> %5 to i64
15543 define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15544 ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15545 ; VLX: # %bb.0: # %entry
15546 ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
15547 ; VLX-NEXT: kmovq %k0, %rax
15548 ; VLX-NEXT: vzeroupper
15551 ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15552 ; NoVLX: # %bb.0: # %entry
15553 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
15554 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
15555 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15556 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15557 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15558 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15559 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15560 ; NoVLX-NEXT: kmovw %k0, %ecx
15561 ; NoVLX-NEXT: vpmaxuw %ymm3, %ymm2, %ymm0
15562 ; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
15563 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15564 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15565 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15566 ; NoVLX-NEXT: kmovw %k0, %eax
15567 ; NoVLX-NEXT: shll $16, %eax
15568 ; NoVLX-NEXT: orl %ecx, %eax
15569 ; NoVLX-NEXT: vzeroupper
15572 %0 = bitcast <8 x i64> %__a to <32 x i16>
15573 %1 = bitcast <8 x i64> %__b to <32 x i16>
15574 %2 = icmp ult <32 x i16> %0, %1
15575 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15576 %4 = bitcast <64 x i1> %3 to i64
15580 define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
15581 ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15582 ; VLX: # %bb.0: # %entry
15583 ; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0
15584 ; VLX-NEXT: kmovq %k0, %rax
15585 ; VLX-NEXT: vzeroupper
15588 ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15589 ; NoVLX: # %bb.0: # %entry
15590 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
15591 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm2
15592 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
15593 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15594 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15595 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15596 ; NoVLX-NEXT: kmovw %k0, %ecx
15597 ; NoVLX-NEXT: vpmaxuw 32(%rdi), %ymm1, %ymm0
15598 ; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
15599 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15600 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15601 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15602 ; NoVLX-NEXT: kmovw %k0, %eax
15603 ; NoVLX-NEXT: shll $16, %eax
15604 ; NoVLX-NEXT: orl %ecx, %eax
15605 ; NoVLX-NEXT: vzeroupper
15608 %0 = bitcast <8 x i64> %__a to <32 x i16>
15609 %load = load <8 x i64>, <8 x i64>* %__b
15610 %1 = bitcast <8 x i64> %load to <32 x i16>
15611 %2 = icmp ult <32 x i16> %0, %1
15612 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15613 %4 = bitcast <64 x i1> %3 to i64
15617 define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15618 ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15619 ; VLX: # %bb.0: # %entry
15620 ; VLX-NEXT: kmovd %edi, %k1
15621 ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
15622 ; VLX-NEXT: kmovq %k0, %rax
15623 ; VLX-NEXT: vzeroupper
15626 ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15627 ; NoVLX: # %bb.0: # %entry
15628 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
15629 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
15630 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
15631 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
15632 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
15633 ; NoVLX-NEXT: kmovw %k0, %eax
15634 ; NoVLX-NEXT: andl %edi, %eax
15635 ; NoVLX-NEXT: shrl $16, %edi
15636 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
15637 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
15638 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15639 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15640 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15641 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15642 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15643 ; NoVLX-NEXT: kmovw %k0, %ecx
15644 ; NoVLX-NEXT: andl %edi, %ecx
15645 ; NoVLX-NEXT: shll $16, %ecx
15646 ; NoVLX-NEXT: movzwl %ax, %eax
15647 ; NoVLX-NEXT: orl %ecx, %eax
15648 ; NoVLX-NEXT: vzeroupper
15651 %0 = bitcast <8 x i64> %__a to <32 x i16>
15652 %1 = bitcast <8 x i64> %__b to <32 x i16>
15653 %2 = icmp ult <32 x i16> %0, %1
15654 %3 = bitcast i32 %__u to <32 x i1>
15655 %4 = and <32 x i1> %2, %3
15656 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15657 %6 = bitcast <64 x i1> %5 to i64
15661 define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
15662 ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15663 ; VLX: # %bb.0: # %entry
15664 ; VLX-NEXT: kmovd %edi, %k1
15665 ; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
15666 ; VLX-NEXT: kmovq %k0, %rax
15667 ; VLX-NEXT: vzeroupper
15670 ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15671 ; NoVLX: # %bb.0: # %entry
15672 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15673 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
15674 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
15675 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
15676 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
15677 ; NoVLX-NEXT: kmovw %k0, %eax
15678 ; NoVLX-NEXT: andl %edi, %eax
15679 ; NoVLX-NEXT: shrl $16, %edi
15680 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
15681 ; NoVLX-NEXT: vpmaxuw 32(%rsi), %ymm0, %ymm1
15682 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15683 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15684 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15685 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15686 ; NoVLX-NEXT: kmovw %k0, %ecx
15687 ; NoVLX-NEXT: andl %edi, %ecx
15688 ; NoVLX-NEXT: shll $16, %ecx
15689 ; NoVLX-NEXT: movzwl %ax, %eax
15690 ; NoVLX-NEXT: orl %ecx, %eax
15691 ; NoVLX-NEXT: vzeroupper
15694 %0 = bitcast <8 x i64> %__a to <32 x i16>
15695 %load = load <8 x i64>, <8 x i64>* %__b
15696 %1 = bitcast <8 x i64> %load to <32 x i16>
15697 %2 = icmp ult <32 x i16> %0, %1
15698 %3 = bitcast i32 %__u to <32 x i1>
15699 %4 = and <32 x i1> %2, %3
15700 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15701 %6 = bitcast <64 x i1> %5 to i64
15706 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15707 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15708 ; VLX: # %bb.0: # %entry
15709 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
15710 ; VLX-NEXT: kmovd %k0, %eax
15711 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15714 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15715 ; NoVLX: # %bb.0: # %entry
15716 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15717 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15718 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15719 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15720 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15721 ; NoVLX-NEXT: kmovw %k0, %eax
15722 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15723 ; NoVLX-NEXT: vzeroupper
15726 %0 = bitcast <2 x i64> %__a to <4 x i32>
15727 %1 = bitcast <2 x i64> %__b to <4 x i32>
15728 %2 = icmp ult <4 x i32> %0, %1
15729 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15730 %4 = bitcast <8 x i1> %3 to i8
15734 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15735 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15736 ; VLX: # %bb.0: # %entry
15737 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
15738 ; VLX-NEXT: kmovd %k0, %eax
15739 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15742 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15743 ; NoVLX: # %bb.0: # %entry
15744 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15745 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
15746 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15747 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15748 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15749 ; NoVLX-NEXT: kmovw %k0, %eax
15750 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15751 ; NoVLX-NEXT: vzeroupper
15754 %0 = bitcast <2 x i64> %__a to <4 x i32>
15755 %load = load <2 x i64>, <2 x i64>* %__b
15756 %1 = bitcast <2 x i64> %load to <4 x i32>
15757 %2 = icmp ult <4 x i32> %0, %1
15758 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15759 %4 = bitcast <8 x i1> %3 to i8
15763 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15764 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
15765 ; VLX: # %bb.0: # %entry
15766 ; VLX-NEXT: kmovd %edi, %k1
15767 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
15768 ; VLX-NEXT: kmovd %k0, %eax
15769 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15772 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
15773 ; NoVLX: # %bb.0: # %entry
15774 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15775 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15776 ; NoVLX-NEXT: kmovw %edi, %k1
15777 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15778 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15779 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15780 ; NoVLX-NEXT: kmovw %k0, %eax
15781 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15782 ; NoVLX-NEXT: vzeroupper
15785 %0 = bitcast <2 x i64> %__a to <4 x i32>
15786 %1 = bitcast <2 x i64> %__b to <4 x i32>
15787 %2 = icmp ult <4 x i32> %0, %1
15788 %3 = bitcast i8 %__u to <8 x i1>
15789 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15790 %4 = and <4 x i1> %2, %extract.i
15791 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15792 %6 = bitcast <8 x i1> %5 to i8
15796 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15797 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
15798 ; VLX: # %bb.0: # %entry
15799 ; VLX-NEXT: kmovd %edi, %k1
15800 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
15801 ; VLX-NEXT: kmovd %k0, %eax
15802 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15805 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
15806 ; NoVLX: # %bb.0: # %entry
15807 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15808 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
15809 ; NoVLX-NEXT: kmovw %edi, %k1
15810 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15811 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15812 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15813 ; NoVLX-NEXT: kmovw %k0, %eax
15814 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15815 ; NoVLX-NEXT: vzeroupper
15818 %0 = bitcast <2 x i64> %__a to <4 x i32>
15819 %load = load <2 x i64>, <2 x i64>* %__b
15820 %1 = bitcast <2 x i64> %load to <4 x i32>
15821 %2 = icmp ult <4 x i32> %0, %1
15822 %3 = bitcast i8 %__u to <8 x i1>
15823 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15824 %4 = and <4 x i1> %2, %extract.i
15825 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15826 %6 = bitcast <8 x i1> %5 to i8
15831 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
15832 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
15833 ; VLX: # %bb.0: # %entry
15834 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
15835 ; VLX-NEXT: kmovd %k0, %eax
15836 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15839 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
15840 ; NoVLX: # %bb.0: # %entry
15841 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15842 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
15843 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15844 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15845 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15846 ; NoVLX-NEXT: kmovw %k0, %eax
15847 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15848 ; NoVLX-NEXT: vzeroupper
15851 %0 = bitcast <2 x i64> %__a to <4 x i32>
15852 %load = load i32, i32* %__b
15853 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15854 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15855 %2 = icmp ult <4 x i32> %0, %1
15856 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15857 %4 = bitcast <8 x i1> %3 to i8
15861 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
15862 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
15863 ; VLX: # %bb.0: # %entry
15864 ; VLX-NEXT: kmovd %edi, %k1
15865 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
15866 ; VLX-NEXT: kmovd %k0, %eax
15867 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15870 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
15871 ; NoVLX: # %bb.0: # %entry
15872 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15873 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
15874 ; NoVLX-NEXT: kmovw %edi, %k1
15875 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15876 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15877 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15878 ; NoVLX-NEXT: kmovw %k0, %eax
15879 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15880 ; NoVLX-NEXT: vzeroupper
15883 %0 = bitcast <2 x i64> %__a to <4 x i32>
15884 %load = load i32, i32* %__b
15885 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15886 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15887 %2 = icmp ult <4 x i32> %0, %1
15888 %3 = bitcast i8 %__u to <8 x i1>
15889 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15890 %4 = and <4 x i1> %extract.i, %2
15891 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15892 %6 = bitcast <8 x i1> %5 to i8
15897 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15898 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
15899 ; VLX: # %bb.0: # %entry
15900 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
15901 ; VLX-NEXT: kmovd %k0, %eax
15902 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15905 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
15906 ; NoVLX: # %bb.0: # %entry
15907 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15908 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15909 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15910 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15911 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15912 ; NoVLX-NEXT: kmovw %k0, %eax
15913 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15914 ; NoVLX-NEXT: vzeroupper
15917 %0 = bitcast <2 x i64> %__a to <4 x i32>
15918 %1 = bitcast <2 x i64> %__b to <4 x i32>
15919 %2 = icmp ult <4 x i32> %0, %1
15920 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15921 %4 = bitcast <16 x i1> %3 to i16
15925 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15926 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
15927 ; VLX: # %bb.0: # %entry
15928 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
15929 ; VLX-NEXT: kmovd %k0, %eax
15930 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15933 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
15934 ; NoVLX: # %bb.0: # %entry
15935 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15936 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
15937 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15938 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15939 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15940 ; NoVLX-NEXT: kmovw %k0, %eax
15941 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15942 ; NoVLX-NEXT: vzeroupper
15945 %0 = bitcast <2 x i64> %__a to <4 x i32>
15946 %load = load <2 x i64>, <2 x i64>* %__b
15947 %1 = bitcast <2 x i64> %load to <4 x i32>
15948 %2 = icmp ult <4 x i32> %0, %1
15949 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15950 %4 = bitcast <16 x i1> %3 to i16
15954 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15955 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
15956 ; VLX: # %bb.0: # %entry
15957 ; VLX-NEXT: kmovd %edi, %k1
15958 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
15959 ; VLX-NEXT: kmovd %k0, %eax
15960 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15963 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
15964 ; NoVLX: # %bb.0: # %entry
15965 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15966 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15967 ; NoVLX-NEXT: kmovw %edi, %k1
15968 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15969 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15970 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15971 ; NoVLX-NEXT: kmovw %k0, %eax
15972 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15973 ; NoVLX-NEXT: vzeroupper
15976 %0 = bitcast <2 x i64> %__a to <4 x i32>
15977 %1 = bitcast <2 x i64> %__b to <4 x i32>
15978 %2 = icmp ult <4 x i32> %0, %1
15979 %3 = bitcast i8 %__u to <8 x i1>
15980 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15981 %4 = and <4 x i1> %2, %extract.i
15982 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15983 %6 = bitcast <16 x i1> %5 to i16
15987 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15988 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
15989 ; VLX: # %bb.0: # %entry
15990 ; VLX-NEXT: kmovd %edi, %k1
15991 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
15992 ; VLX-NEXT: kmovd %k0, %eax
15993 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15996 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
15997 ; NoVLX: # %bb.0: # %entry
15998 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15999 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
16000 ; NoVLX-NEXT: kmovw %edi, %k1
16001 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16002 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16003 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16004 ; NoVLX-NEXT: kmovw %k0, %eax
16005 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16006 ; NoVLX-NEXT: vzeroupper
16009 %0 = bitcast <2 x i64> %__a to <4 x i32>
16010 %load = load <2 x i64>, <2 x i64>* %__b
16011 %1 = bitcast <2 x i64> %load to <4 x i32>
16012 %2 = icmp ult <4 x i32> %0, %1
16013 %3 = bitcast i8 %__u to <8 x i1>
16014 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16015 %4 = and <4 x i1> %2, %extract.i
16016 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16017 %6 = bitcast <16 x i1> %5 to i16
16022 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16023 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
16024 ; VLX: # %bb.0: # %entry
16025 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
16026 ; VLX-NEXT: kmovd %k0, %eax
16027 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16030 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
16031 ; NoVLX: # %bb.0: # %entry
16032 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16033 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
16034 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16035 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16036 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16037 ; NoVLX-NEXT: kmovw %k0, %eax
16038 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16039 ; NoVLX-NEXT: vzeroupper
16042 %0 = bitcast <2 x i64> %__a to <4 x i32>
16043 %load = load i32, i32* %__b
16044 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16045 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16046 %2 = icmp ult <4 x i32> %0, %1
16047 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16048 %4 = bitcast <16 x i1> %3 to i16
16052 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16053 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
16054 ; VLX: # %bb.0: # %entry
16055 ; VLX-NEXT: kmovd %edi, %k1
16056 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16057 ; VLX-NEXT: kmovd %k0, %eax
16058 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16061 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
16062 ; NoVLX: # %bb.0: # %entry
16063 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16064 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
16065 ; NoVLX-NEXT: kmovw %edi, %k1
16066 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16067 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16068 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16069 ; NoVLX-NEXT: kmovw %k0, %eax
16070 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16071 ; NoVLX-NEXT: vzeroupper
16074 %0 = bitcast <2 x i64> %__a to <4 x i32>
16075 %load = load i32, i32* %__b
16076 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16077 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16078 %2 = icmp ult <4 x i32> %0, %1
16079 %3 = bitcast i8 %__u to <8 x i1>
16080 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16081 %4 = and <4 x i1> %extract.i, %2
16082 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16083 %6 = bitcast <16 x i1> %5 to i16
16088 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16089 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
16090 ; VLX: # %bb.0: # %entry
16091 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
16092 ; VLX-NEXT: kmovd %k0, %eax
16095 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
16096 ; NoVLX: # %bb.0: # %entry
16097 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16098 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16099 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16100 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16101 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16102 ; NoVLX-NEXT: kmovw %k0, %eax
16103 ; NoVLX-NEXT: vzeroupper
16106 %0 = bitcast <2 x i64> %__a to <4 x i32>
16107 %1 = bitcast <2 x i64> %__b to <4 x i32>
16108 %2 = icmp ult <4 x i32> %0, %1
16109 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16110 %4 = bitcast <32 x i1> %3 to i32
16114 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16115 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16116 ; VLX: # %bb.0: # %entry
16117 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
16118 ; VLX-NEXT: kmovd %k0, %eax
16121 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16122 ; NoVLX: # %bb.0: # %entry
16123 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16124 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
16125 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16126 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16127 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16128 ; NoVLX-NEXT: kmovw %k0, %eax
16129 ; NoVLX-NEXT: vzeroupper
16132 %0 = bitcast <2 x i64> %__a to <4 x i32>
16133 %load = load <2 x i64>, <2 x i64>* %__b
16134 %1 = bitcast <2 x i64> %load to <4 x i32>
16135 %2 = icmp ult <4 x i32> %0, %1
16136 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16137 %4 = bitcast <32 x i1> %3 to i32
16141 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16142 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16143 ; VLX: # %bb.0: # %entry
16144 ; VLX-NEXT: kmovd %edi, %k1
16145 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16146 ; VLX-NEXT: kmovd %k0, %eax
16149 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16150 ; NoVLX: # %bb.0: # %entry
16151 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16152 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16153 ; NoVLX-NEXT: kmovw %edi, %k1
16154 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16155 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16156 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16157 ; NoVLX-NEXT: kmovw %k0, %eax
16158 ; NoVLX-NEXT: vzeroupper
16161 %0 = bitcast <2 x i64> %__a to <4 x i32>
16162 %1 = bitcast <2 x i64> %__b to <4 x i32>
16163 %2 = icmp ult <4 x i32> %0, %1
16164 %3 = bitcast i8 %__u to <8 x i1>
16165 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16166 %4 = and <4 x i1> %2, %extract.i
16167 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16168 %6 = bitcast <32 x i1> %5 to i32
16172 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16173 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16174 ; VLX: # %bb.0: # %entry
16175 ; VLX-NEXT: kmovd %edi, %k1
16176 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16177 ; VLX-NEXT: kmovd %k0, %eax
16180 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16181 ; NoVLX: # %bb.0: # %entry
16182 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16183 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
16184 ; NoVLX-NEXT: kmovw %edi, %k1
16185 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16186 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16187 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16188 ; NoVLX-NEXT: kmovw %k0, %eax
16189 ; NoVLX-NEXT: vzeroupper
16192 %0 = bitcast <2 x i64> %__a to <4 x i32>
16193 %load = load <2 x i64>, <2 x i64>* %__b
16194 %1 = bitcast <2 x i64> %load to <4 x i32>
16195 %2 = icmp ult <4 x i32> %0, %1
16196 %3 = bitcast i8 %__u to <8 x i1>
16197 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16198 %4 = and <4 x i1> %2, %extract.i
16199 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16200 %6 = bitcast <32 x i1> %5 to i32
16205 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16206 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16207 ; VLX: # %bb.0: # %entry
16208 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
16209 ; VLX-NEXT: kmovd %k0, %eax
16212 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16213 ; NoVLX: # %bb.0: # %entry
16214 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16215 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
16216 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16217 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16218 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16219 ; NoVLX-NEXT: kmovw %k0, %eax
16220 ; NoVLX-NEXT: vzeroupper
16223 %0 = bitcast <2 x i64> %__a to <4 x i32>
16224 %load = load i32, i32* %__b
16225 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16226 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16227 %2 = icmp ult <4 x i32> %0, %1
16228 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16229 %4 = bitcast <32 x i1> %3 to i32
16233 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16234 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16235 ; VLX: # %bb.0: # %entry
16236 ; VLX-NEXT: kmovd %edi, %k1
16237 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16238 ; VLX-NEXT: kmovd %k0, %eax
16241 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16242 ; NoVLX: # %bb.0: # %entry
16243 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16244 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
16245 ; NoVLX-NEXT: kmovw %edi, %k1
16246 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16247 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16248 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16249 ; NoVLX-NEXT: kmovw %k0, %eax
16250 ; NoVLX-NEXT: vzeroupper
16253 %0 = bitcast <2 x i64> %__a to <4 x i32>
16254 %load = load i32, i32* %__b
16255 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16256 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16257 %2 = icmp ult <4 x i32> %0, %1
16258 %3 = bitcast i8 %__u to <8 x i1>
16259 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16260 %4 = and <4 x i1> %extract.i, %2
16261 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16262 %6 = bitcast <32 x i1> %5 to i32
16267 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16268 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16269 ; VLX: # %bb.0: # %entry
16270 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
16271 ; VLX-NEXT: kmovq %k0, %rax
16274 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16275 ; NoVLX: # %bb.0: # %entry
16276 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16277 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16278 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16279 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16280 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16281 ; NoVLX-NEXT: kmovw %k0, %eax
16282 ; NoVLX-NEXT: vzeroupper
16285 %0 = bitcast <2 x i64> %__a to <4 x i32>
16286 %1 = bitcast <2 x i64> %__b to <4 x i32>
16287 %2 = icmp ult <4 x i32> %0, %1
16288 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16289 %4 = bitcast <64 x i1> %3 to i64
16293 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16294 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16295 ; VLX: # %bb.0: # %entry
16296 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
16297 ; VLX-NEXT: kmovq %k0, %rax
16300 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16301 ; NoVLX: # %bb.0: # %entry
16302 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16303 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
16304 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16305 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16306 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16307 ; NoVLX-NEXT: kmovw %k0, %eax
16308 ; NoVLX-NEXT: vzeroupper
16311 %0 = bitcast <2 x i64> %__a to <4 x i32>
16312 %load = load <2 x i64>, <2 x i64>* %__b
16313 %1 = bitcast <2 x i64> %load to <4 x i32>
16314 %2 = icmp ult <4 x i32> %0, %1
16315 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16316 %4 = bitcast <64 x i1> %3 to i64
16320 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16321 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16322 ; VLX: # %bb.0: # %entry
16323 ; VLX-NEXT: kmovd %edi, %k1
16324 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16325 ; VLX-NEXT: kmovq %k0, %rax
16328 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16329 ; NoVLX: # %bb.0: # %entry
16330 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16331 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16332 ; NoVLX-NEXT: kmovw %edi, %k1
16333 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16334 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16335 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16336 ; NoVLX-NEXT: kmovw %k0, %eax
16337 ; NoVLX-NEXT: vzeroupper
16340 %0 = bitcast <2 x i64> %__a to <4 x i32>
16341 %1 = bitcast <2 x i64> %__b to <4 x i32>
16342 %2 = icmp ult <4 x i32> %0, %1
16343 %3 = bitcast i8 %__u to <8 x i1>
16344 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16345 %4 = and <4 x i1> %2, %extract.i
16346 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16347 %6 = bitcast <64 x i1> %5 to i64
16351 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16352 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16353 ; VLX: # %bb.0: # %entry
16354 ; VLX-NEXT: kmovd %edi, %k1
16355 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16356 ; VLX-NEXT: kmovq %k0, %rax
16359 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16360 ; NoVLX: # %bb.0: # %entry
16361 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16362 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
16363 ; NoVLX-NEXT: kmovw %edi, %k1
16364 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16365 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16366 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16367 ; NoVLX-NEXT: kmovw %k0, %eax
16368 ; NoVLX-NEXT: vzeroupper
16371 %0 = bitcast <2 x i64> %__a to <4 x i32>
16372 %load = load <2 x i64>, <2 x i64>* %__b
16373 %1 = bitcast <2 x i64> %load to <4 x i32>
16374 %2 = icmp ult <4 x i32> %0, %1
16375 %3 = bitcast i8 %__u to <8 x i1>
16376 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16377 %4 = and <4 x i1> %2, %extract.i
16378 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16379 %6 = bitcast <64 x i1> %5 to i64
16384 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16385 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16386 ; VLX: # %bb.0: # %entry
16387 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
16388 ; VLX-NEXT: kmovq %k0, %rax
16391 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16392 ; NoVLX: # %bb.0: # %entry
16393 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16394 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
16395 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16396 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16397 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16398 ; NoVLX-NEXT: kmovw %k0, %eax
16399 ; NoVLX-NEXT: vzeroupper
16402 %0 = bitcast <2 x i64> %__a to <4 x i32>
16403 %load = load i32, i32* %__b
16404 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16405 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16406 %2 = icmp ult <4 x i32> %0, %1
16407 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16408 %4 = bitcast <64 x i1> %3 to i64
16412 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16413 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16414 ; VLX: # %bb.0: # %entry
16415 ; VLX-NEXT: kmovd %edi, %k1
16416 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16417 ; VLX-NEXT: kmovq %k0, %rax
16420 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16421 ; NoVLX: # %bb.0: # %entry
16422 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16423 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
16424 ; NoVLX-NEXT: kmovw %edi, %k1
16425 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16426 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16427 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16428 ; NoVLX-NEXT: kmovw %k0, %eax
16429 ; NoVLX-NEXT: vzeroupper
16432 %0 = bitcast <2 x i64> %__a to <4 x i32>
16433 %load = load i32, i32* %__b
16434 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16435 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16436 %2 = icmp ult <4 x i32> %0, %1
16437 %3 = bitcast i8 %__u to <8 x i1>
16438 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16439 %4 = and <4 x i1> %extract.i, %2
16440 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16441 %6 = bitcast <64 x i1> %5 to i64
16446 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16447 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16448 ; VLX: # %bb.0: # %entry
16449 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16450 ; VLX-NEXT: kmovd %k0, %eax
16451 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16452 ; VLX-NEXT: vzeroupper
16455 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16456 ; NoVLX: # %bb.0: # %entry
16457 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16458 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16459 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16460 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16461 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16462 ; NoVLX-NEXT: kmovw %k0, %eax
16463 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16464 ; NoVLX-NEXT: vzeroupper
16467 %0 = bitcast <4 x i64> %__a to <8 x i32>
16468 %1 = bitcast <4 x i64> %__b to <8 x i32>
16469 %2 = icmp ult <8 x i32> %0, %1
16470 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16471 %4 = bitcast <16 x i1> %3 to i16
16475 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16476 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16477 ; VLX: # %bb.0: # %entry
16478 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16479 ; VLX-NEXT: kmovd %k0, %eax
16480 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16481 ; VLX-NEXT: vzeroupper
16484 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16485 ; NoVLX: # %bb.0: # %entry
16486 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16487 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16488 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16489 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16490 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16491 ; NoVLX-NEXT: kmovw %k0, %eax
16492 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16493 ; NoVLX-NEXT: vzeroupper
16496 %0 = bitcast <4 x i64> %__a to <8 x i32>
16497 %load = load <4 x i64>, <4 x i64>* %__b
16498 %1 = bitcast <4 x i64> %load to <8 x i32>
16499 %2 = icmp ult <8 x i32> %0, %1
16500 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16501 %4 = bitcast <16 x i1> %3 to i16
16505 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16506 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16507 ; VLX: # %bb.0: # %entry
16508 ; VLX-NEXT: kmovd %edi, %k1
16509 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16510 ; VLX-NEXT: kmovd %k0, %eax
16511 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16512 ; VLX-NEXT: vzeroupper
16515 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16516 ; NoVLX: # %bb.0: # %entry
16517 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16518 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16519 ; NoVLX-NEXT: kmovw %edi, %k1
16520 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16521 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16522 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16523 ; NoVLX-NEXT: kmovw %k0, %eax
16524 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16525 ; NoVLX-NEXT: vzeroupper
16528 %0 = bitcast <4 x i64> %__a to <8 x i32>
16529 %1 = bitcast <4 x i64> %__b to <8 x i32>
16530 %2 = icmp ult <8 x i32> %0, %1
16531 %3 = bitcast i8 %__u to <8 x i1>
16532 %4 = and <8 x i1> %2, %3
16533 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16534 %6 = bitcast <16 x i1> %5 to i16
16538 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16539 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16540 ; VLX: # %bb.0: # %entry
16541 ; VLX-NEXT: kmovd %edi, %k1
16542 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16543 ; VLX-NEXT: kmovd %k0, %eax
16544 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16545 ; VLX-NEXT: vzeroupper
16548 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16549 ; NoVLX: # %bb.0: # %entry
16550 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16551 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16552 ; NoVLX-NEXT: kmovw %edi, %k1
16553 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16554 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16555 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16556 ; NoVLX-NEXT: kmovw %k0, %eax
16557 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16558 ; NoVLX-NEXT: vzeroupper
16561 %0 = bitcast <4 x i64> %__a to <8 x i32>
16562 %load = load <4 x i64>, <4 x i64>* %__b
16563 %1 = bitcast <4 x i64> %load to <8 x i32>
16564 %2 = icmp ult <8 x i32> %0, %1
16565 %3 = bitcast i8 %__u to <8 x i1>
16566 %4 = and <8 x i1> %2, %3
16567 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16568 %6 = bitcast <16 x i1> %5 to i16
16573 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
16574 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16575 ; VLX: # %bb.0: # %entry
16576 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16577 ; VLX-NEXT: kmovd %k0, %eax
16578 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16579 ; VLX-NEXT: vzeroupper
16582 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16583 ; NoVLX: # %bb.0: # %entry
16584 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16585 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
16586 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16587 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16588 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16589 ; NoVLX-NEXT: kmovw %k0, %eax
16590 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16591 ; NoVLX-NEXT: vzeroupper
16594 %0 = bitcast <4 x i64> %__a to <8 x i32>
16595 %load = load i32, i32* %__b
16596 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16597 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16598 %2 = icmp ult <8 x i32> %0, %1
16599 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16600 %4 = bitcast <16 x i1> %3 to i16
16604 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
16605 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16606 ; VLX: # %bb.0: # %entry
16607 ; VLX-NEXT: kmovd %edi, %k1
16608 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16609 ; VLX-NEXT: kmovd %k0, %eax
16610 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16611 ; VLX-NEXT: vzeroupper
16614 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16615 ; NoVLX: # %bb.0: # %entry
16616 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16617 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
16618 ; NoVLX-NEXT: kmovw %edi, %k1
16619 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16620 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16621 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16622 ; NoVLX-NEXT: kmovw %k0, %eax
16623 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16624 ; NoVLX-NEXT: vzeroupper
16627 %0 = bitcast <4 x i64> %__a to <8 x i32>
16628 %load = load i32, i32* %__b
16629 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16630 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16631 %2 = icmp ult <8 x i32> %0, %1
16632 %3 = bitcast i8 %__u to <8 x i1>
16633 %4 = and <8 x i1> %3, %2
16634 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16635 %6 = bitcast <16 x i1> %5 to i16
16640 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16641 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16642 ; VLX: # %bb.0: # %entry
16643 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16644 ; VLX-NEXT: kmovd %k0, %eax
16645 ; VLX-NEXT: vzeroupper
16648 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16649 ; NoVLX: # %bb.0: # %entry
16650 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16651 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16652 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16653 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16654 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16655 ; NoVLX-NEXT: kmovw %k0, %eax
16656 ; NoVLX-NEXT: vzeroupper
16659 %0 = bitcast <4 x i64> %__a to <8 x i32>
16660 %1 = bitcast <4 x i64> %__b to <8 x i32>
16661 %2 = icmp ult <8 x i32> %0, %1
16662 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16663 %4 = bitcast <32 x i1> %3 to i32
16667 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16668 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16669 ; VLX: # %bb.0: # %entry
16670 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16671 ; VLX-NEXT: kmovd %k0, %eax
16672 ; VLX-NEXT: vzeroupper
16675 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16676 ; NoVLX: # %bb.0: # %entry
16677 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16678 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16679 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16680 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16681 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16682 ; NoVLX-NEXT: kmovw %k0, %eax
16683 ; NoVLX-NEXT: vzeroupper
16686 %0 = bitcast <4 x i64> %__a to <8 x i32>
16687 %load = load <4 x i64>, <4 x i64>* %__b
16688 %1 = bitcast <4 x i64> %load to <8 x i32>
16689 %2 = icmp ult <8 x i32> %0, %1
16690 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16691 %4 = bitcast <32 x i1> %3 to i32
16695 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16696 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16697 ; VLX: # %bb.0: # %entry
16698 ; VLX-NEXT: kmovd %edi, %k1
16699 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16700 ; VLX-NEXT: kmovd %k0, %eax
16701 ; VLX-NEXT: vzeroupper
16704 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16705 ; NoVLX: # %bb.0: # %entry
16706 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16707 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16708 ; NoVLX-NEXT: kmovw %edi, %k1
16709 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16710 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16711 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16712 ; NoVLX-NEXT: kmovw %k0, %eax
16713 ; NoVLX-NEXT: vzeroupper
16716 %0 = bitcast <4 x i64> %__a to <8 x i32>
16717 %1 = bitcast <4 x i64> %__b to <8 x i32>
16718 %2 = icmp ult <8 x i32> %0, %1
16719 %3 = bitcast i8 %__u to <8 x i1>
16720 %4 = and <8 x i1> %2, %3
16721 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16722 %6 = bitcast <32 x i1> %5 to i32
16726 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16727 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16728 ; VLX: # %bb.0: # %entry
16729 ; VLX-NEXT: kmovd %edi, %k1
16730 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16731 ; VLX-NEXT: kmovd %k0, %eax
16732 ; VLX-NEXT: vzeroupper
16735 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16736 ; NoVLX: # %bb.0: # %entry
16737 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16738 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16739 ; NoVLX-NEXT: kmovw %edi, %k1
16740 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16741 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16742 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16743 ; NoVLX-NEXT: kmovw %k0, %eax
16744 ; NoVLX-NEXT: vzeroupper
16747 %0 = bitcast <4 x i64> %__a to <8 x i32>
16748 %load = load <4 x i64>, <4 x i64>* %__b
16749 %1 = bitcast <4 x i64> %load to <8 x i32>
16750 %2 = icmp ult <8 x i32> %0, %1
16751 %3 = bitcast i8 %__u to <8 x i1>
16752 %4 = and <8 x i1> %2, %3
16753 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16754 %6 = bitcast <32 x i1> %5 to i32
16759 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
16760 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
16761 ; VLX: # %bb.0: # %entry
16762 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16763 ; VLX-NEXT: kmovd %k0, %eax
16764 ; VLX-NEXT: vzeroupper
16767 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
16768 ; NoVLX: # %bb.0: # %entry
16769 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16770 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
16771 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16772 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16773 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16774 ; NoVLX-NEXT: kmovw %k0, %eax
16775 ; NoVLX-NEXT: vzeroupper
16778 %0 = bitcast <4 x i64> %__a to <8 x i32>
16779 %load = load i32, i32* %__b
16780 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16781 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16782 %2 = icmp ult <8 x i32> %0, %1
16783 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16784 %4 = bitcast <32 x i1> %3 to i32
16788 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
16789 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
16790 ; VLX: # %bb.0: # %entry
16791 ; VLX-NEXT: kmovd %edi, %k1
16792 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16793 ; VLX-NEXT: kmovd %k0, %eax
16794 ; VLX-NEXT: vzeroupper
16797 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
16798 ; NoVLX: # %bb.0: # %entry
16799 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16800 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
16801 ; NoVLX-NEXT: kmovw %edi, %k1
16802 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16803 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16804 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16805 ; NoVLX-NEXT: kmovw %k0, %eax
16806 ; NoVLX-NEXT: vzeroupper
16809 %0 = bitcast <4 x i64> %__a to <8 x i32>
16810 %load = load i32, i32* %__b
16811 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16812 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16813 %2 = icmp ult <8 x i32> %0, %1
16814 %3 = bitcast i8 %__u to <8 x i1>
16815 %4 = and <8 x i1> %3, %2
16816 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16817 %6 = bitcast <32 x i1> %5 to i32
16822 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16823 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
16824 ; VLX: # %bb.0: # %entry
16825 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16826 ; VLX-NEXT: kmovq %k0, %rax
16827 ; VLX-NEXT: vzeroupper
16830 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
16831 ; NoVLX: # %bb.0: # %entry
16832 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16833 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16834 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16835 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16836 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16837 ; NoVLX-NEXT: kmovw %k0, %eax
16838 ; NoVLX-NEXT: vzeroupper
16841 %0 = bitcast <4 x i64> %__a to <8 x i32>
16842 %1 = bitcast <4 x i64> %__b to <8 x i32>
16843 %2 = icmp ult <8 x i32> %0, %1
16844 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16845 %4 = bitcast <64 x i1> %3 to i64
16849 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16850 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
16851 ; VLX: # %bb.0: # %entry
16852 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16853 ; VLX-NEXT: kmovq %k0, %rax
16854 ; VLX-NEXT: vzeroupper
16857 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
16858 ; NoVLX: # %bb.0: # %entry
16859 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16860 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16861 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16862 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16863 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16864 ; NoVLX-NEXT: kmovw %k0, %eax
16865 ; NoVLX-NEXT: vzeroupper
16868 %0 = bitcast <4 x i64> %__a to <8 x i32>
16869 %load = load <4 x i64>, <4 x i64>* %__b
16870 %1 = bitcast <4 x i64> %load to <8 x i32>
16871 %2 = icmp ult <8 x i32> %0, %1
16872 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16873 %4 = bitcast <64 x i1> %3 to i64
16877 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16878 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
16879 ; VLX: # %bb.0: # %entry
16880 ; VLX-NEXT: kmovd %edi, %k1
16881 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16882 ; VLX-NEXT: kmovq %k0, %rax
16883 ; VLX-NEXT: vzeroupper
16886 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
16887 ; NoVLX: # %bb.0: # %entry
16888 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16889 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16890 ; NoVLX-NEXT: kmovw %edi, %k1
16891 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16892 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16893 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16894 ; NoVLX-NEXT: kmovw %k0, %eax
16895 ; NoVLX-NEXT: vzeroupper
16898 %0 = bitcast <4 x i64> %__a to <8 x i32>
16899 %1 = bitcast <4 x i64> %__b to <8 x i32>
16900 %2 = icmp ult <8 x i32> %0, %1
16901 %3 = bitcast i8 %__u to <8 x i1>
16902 %4 = and <8 x i1> %2, %3
16903 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16904 %6 = bitcast <64 x i1> %5 to i64
16908 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16909 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
16910 ; VLX: # %bb.0: # %entry
16911 ; VLX-NEXT: kmovd %edi, %k1
16912 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16913 ; VLX-NEXT: kmovq %k0, %rax
16914 ; VLX-NEXT: vzeroupper
16917 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
16918 ; NoVLX: # %bb.0: # %entry
16919 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16920 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16921 ; NoVLX-NEXT: kmovw %edi, %k1
16922 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16923 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16924 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16925 ; NoVLX-NEXT: kmovw %k0, %eax
16926 ; NoVLX-NEXT: vzeroupper
16929 %0 = bitcast <4 x i64> %__a to <8 x i32>
16930 %load = load <4 x i64>, <4 x i64>* %__b
16931 %1 = bitcast <4 x i64> %load to <8 x i32>
16932 %2 = icmp ult <8 x i32> %0, %1
16933 %3 = bitcast i8 %__u to <8 x i1>
16934 %4 = and <8 x i1> %2, %3
16935 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16936 %6 = bitcast <64 x i1> %5 to i64
16941 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
16942 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
16943 ; VLX: # %bb.0: # %entry
16944 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16945 ; VLX-NEXT: kmovq %k0, %rax
16946 ; VLX-NEXT: vzeroupper
16949 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
16950 ; NoVLX: # %bb.0: # %entry
16951 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16952 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
16953 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16954 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16955 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16956 ; NoVLX-NEXT: kmovw %k0, %eax
16957 ; NoVLX-NEXT: vzeroupper
16960 %0 = bitcast <4 x i64> %__a to <8 x i32>
16961 %load = load i32, i32* %__b
16962 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16963 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16964 %2 = icmp ult <8 x i32> %0, %1
16965 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16966 %4 = bitcast <64 x i1> %3 to i64
16970 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
16971 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
16972 ; VLX: # %bb.0: # %entry
16973 ; VLX-NEXT: kmovd %edi, %k1
16974 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16975 ; VLX-NEXT: kmovq %k0, %rax
16976 ; VLX-NEXT: vzeroupper
16979 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
16980 ; NoVLX: # %bb.0: # %entry
16981 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16982 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
16983 ; NoVLX-NEXT: kmovw %edi, %k1
16984 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16985 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16986 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16987 ; NoVLX-NEXT: kmovw %k0, %eax
16988 ; NoVLX-NEXT: vzeroupper
16991 %0 = bitcast <4 x i64> %__a to <8 x i32>
16992 %load = load i32, i32* %__b
16993 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16994 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16995 %2 = icmp ult <8 x i32> %0, %1
16996 %3 = bitcast i8 %__u to <8 x i1>
16997 %4 = and <8 x i1> %3, %2
16998 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16999 %6 = bitcast <64 x i1> %5 to i64
17004 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17005 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
17006 ; VLX: # %bb.0: # %entry
17007 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17008 ; VLX-NEXT: kmovd %k0, %eax
17009 ; VLX-NEXT: vzeroupper
17012 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
17013 ; NoVLX: # %bb.0: # %entry
17014 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17015 ; NoVLX-NEXT: kmovw %k0, %eax
17016 ; NoVLX-NEXT: vzeroupper
17019 %0 = bitcast <8 x i64> %__a to <16 x i32>
17020 %1 = bitcast <8 x i64> %__b to <16 x i32>
17021 %2 = icmp ult <16 x i32> %0, %1
17022 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17023 %4 = bitcast <32 x i1> %3 to i32
17027 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17028 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
17029 ; VLX: # %bb.0: # %entry
17030 ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17031 ; VLX-NEXT: kmovd %k0, %eax
17032 ; VLX-NEXT: vzeroupper
17035 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
17036 ; NoVLX: # %bb.0: # %entry
17037 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17038 ; NoVLX-NEXT: kmovw %k0, %eax
17039 ; NoVLX-NEXT: vzeroupper
17042 %0 = bitcast <8 x i64> %__a to <16 x i32>
17043 %load = load <8 x i64>, <8 x i64>* %__b
17044 %1 = bitcast <8 x i64> %load to <16 x i32>
17045 %2 = icmp ult <16 x i32> %0, %1
17046 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17047 %4 = bitcast <32 x i1> %3 to i32
17051 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17052 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
17053 ; VLX: # %bb.0: # %entry
17054 ; VLX-NEXT: kmovd %edi, %k1
17055 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17056 ; VLX-NEXT: kmovd %k0, %eax
17057 ; VLX-NEXT: vzeroupper
17060 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
17061 ; NoVLX: # %bb.0: # %entry
17062 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17063 ; NoVLX-NEXT: kmovw %k0, %eax
17064 ; NoVLX-NEXT: andl %edi, %eax
17065 ; NoVLX-NEXT: vzeroupper
17068 %0 = bitcast <8 x i64> %__a to <16 x i32>
17069 %1 = bitcast <8 x i64> %__b to <16 x i32>
17070 %2 = icmp ult <16 x i32> %0, %1
17071 %3 = bitcast i16 %__u to <16 x i1>
17072 %4 = and <16 x i1> %2, %3
17073 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17074 %6 = bitcast <32 x i1> %5 to i32
17078 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17079 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
17080 ; VLX: # %bb.0: # %entry
17081 ; VLX-NEXT: kmovd %edi, %k1
17082 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
17083 ; VLX-NEXT: kmovd %k0, %eax
17084 ; VLX-NEXT: vzeroupper
17087 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
17088 ; NoVLX: # %bb.0: # %entry
17089 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0
17090 ; NoVLX-NEXT: kmovw %k0, %eax
17091 ; NoVLX-NEXT: andl %edi, %eax
17092 ; NoVLX-NEXT: vzeroupper
17095 %0 = bitcast <8 x i64> %__a to <16 x i32>
17096 %load = load <8 x i64>, <8 x i64>* %__b
17097 %1 = bitcast <8 x i64> %load to <16 x i32>
17098 %2 = icmp ult <16 x i32> %0, %1
17099 %3 = bitcast i16 %__u to <16 x i1>
17100 %4 = and <16 x i1> %2, %3
17101 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17102 %6 = bitcast <32 x i1> %5 to i32
17107 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
17108 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17109 ; VLX: # %bb.0: # %entry
17110 ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17111 ; VLX-NEXT: kmovd %k0, %eax
17112 ; VLX-NEXT: vzeroupper
17115 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17116 ; NoVLX: # %bb.0: # %entry
17117 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17118 ; NoVLX-NEXT: kmovw %k0, %eax
17119 ; NoVLX-NEXT: vzeroupper
17122 %0 = bitcast <8 x i64> %__a to <16 x i32>
17123 %load = load i32, i32* %__b
17124 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17125 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17126 %2 = icmp ult <16 x i32> %0, %1
17127 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17128 %4 = bitcast <32 x i1> %3 to i32
17132 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
17133 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17134 ; VLX: # %bb.0: # %entry
17135 ; VLX-NEXT: kmovd %edi, %k1
17136 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17137 ; VLX-NEXT: kmovd %k0, %eax
17138 ; VLX-NEXT: vzeroupper
17141 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17142 ; NoVLX: # %bb.0: # %entry
17143 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0
17144 ; NoVLX-NEXT: kmovw %k0, %eax
17145 ; NoVLX-NEXT: andl %edi, %eax
17146 ; NoVLX-NEXT: vzeroupper
17149 %0 = bitcast <8 x i64> %__a to <16 x i32>
17150 %load = load i32, i32* %__b
17151 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17152 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17153 %2 = icmp ult <16 x i32> %0, %1
17154 %3 = bitcast i16 %__u to <16 x i1>
17155 %4 = and <16 x i1> %3, %2
17156 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17157 %6 = bitcast <32 x i1> %5 to i32
17162 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17163 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17164 ; VLX: # %bb.0: # %entry
17165 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17166 ; VLX-NEXT: kmovq %k0, %rax
17167 ; VLX-NEXT: vzeroupper
17170 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17171 ; NoVLX: # %bb.0: # %entry
17172 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17173 ; NoVLX-NEXT: kmovw %k0, %eax
17174 ; NoVLX-NEXT: vzeroupper
17177 %0 = bitcast <8 x i64> %__a to <16 x i32>
17178 %1 = bitcast <8 x i64> %__b to <16 x i32>
17179 %2 = icmp ult <16 x i32> %0, %1
17180 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17181 %4 = bitcast <64 x i1> %3 to i64
17185 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17186 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17187 ; VLX: # %bb.0: # %entry
17188 ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17189 ; VLX-NEXT: kmovq %k0, %rax
17190 ; VLX-NEXT: vzeroupper
17193 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17194 ; NoVLX: # %bb.0: # %entry
17195 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17196 ; NoVLX-NEXT: kmovw %k0, %eax
17197 ; NoVLX-NEXT: vzeroupper
17200 %0 = bitcast <8 x i64> %__a to <16 x i32>
17201 %load = load <8 x i64>, <8 x i64>* %__b
17202 %1 = bitcast <8 x i64> %load to <16 x i32>
17203 %2 = icmp ult <16 x i32> %0, %1
17204 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17205 %4 = bitcast <64 x i1> %3 to i64
17209 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17210 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17211 ; VLX: # %bb.0: # %entry
17212 ; VLX-NEXT: kmovd %edi, %k1
17213 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17214 ; VLX-NEXT: kmovq %k0, %rax
17215 ; VLX-NEXT: vzeroupper
17218 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17219 ; NoVLX: # %bb.0: # %entry
17220 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17221 ; NoVLX-NEXT: kmovw %k0, %eax
17222 ; NoVLX-NEXT: andl %edi, %eax
17223 ; NoVLX-NEXT: vzeroupper
17226 %0 = bitcast <8 x i64> %__a to <16 x i32>
17227 %1 = bitcast <8 x i64> %__b to <16 x i32>
17228 %2 = icmp ult <16 x i32> %0, %1
17229 %3 = bitcast i16 %__u to <16 x i1>
17230 %4 = and <16 x i1> %2, %3
17231 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17232 %6 = bitcast <64 x i1> %5 to i64
17236 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17237 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17238 ; VLX: # %bb.0: # %entry
17239 ; VLX-NEXT: kmovd %edi, %k1
17240 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
17241 ; VLX-NEXT: kmovq %k0, %rax
17242 ; VLX-NEXT: vzeroupper
17245 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17246 ; NoVLX: # %bb.0: # %entry
17247 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0
17248 ; NoVLX-NEXT: kmovw %k0, %eax
17249 ; NoVLX-NEXT: andl %edi, %eax
17250 ; NoVLX-NEXT: vzeroupper
17253 %0 = bitcast <8 x i64> %__a to <16 x i32>
17254 %load = load <8 x i64>, <8 x i64>* %__b
17255 %1 = bitcast <8 x i64> %load to <16 x i32>
17256 %2 = icmp ult <16 x i32> %0, %1
17257 %3 = bitcast i16 %__u to <16 x i1>
17258 %4 = and <16 x i1> %2, %3
17259 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17260 %6 = bitcast <64 x i1> %5 to i64
17265 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
17266 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17267 ; VLX: # %bb.0: # %entry
17268 ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17269 ; VLX-NEXT: kmovq %k0, %rax
17270 ; VLX-NEXT: vzeroupper
17273 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17274 ; NoVLX: # %bb.0: # %entry
17275 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17276 ; NoVLX-NEXT: kmovw %k0, %eax
17277 ; NoVLX-NEXT: vzeroupper
17280 %0 = bitcast <8 x i64> %__a to <16 x i32>
17281 %load = load i32, i32* %__b
17282 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17283 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17284 %2 = icmp ult <16 x i32> %0, %1
17285 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17286 %4 = bitcast <64 x i1> %3 to i64
17290 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
17291 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17292 ; VLX: # %bb.0: # %entry
17293 ; VLX-NEXT: kmovd %edi, %k1
17294 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17295 ; VLX-NEXT: kmovq %k0, %rax
17296 ; VLX-NEXT: vzeroupper
17299 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17300 ; NoVLX: # %bb.0: # %entry
17301 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0
17302 ; NoVLX-NEXT: kmovw %k0, %eax
17303 ; NoVLX-NEXT: andl %edi, %eax
17304 ; NoVLX-NEXT: vzeroupper
17307 %0 = bitcast <8 x i64> %__a to <16 x i32>
17308 %load = load i32, i32* %__b
17309 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17310 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17311 %2 = icmp ult <16 x i32> %0, %1
17312 %3 = bitcast i16 %__u to <16 x i1>
17313 %4 = and <16 x i1> %3, %2
17314 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17315 %6 = bitcast <64 x i1> %5 to i64
17320 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17321 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17322 ; VLX: # %bb.0: # %entry
17323 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17324 ; VLX-NEXT: kmovb %k0, %eax
17327 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17328 ; NoVLX: # %bb.0: # %entry
17329 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17330 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17331 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17332 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17333 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17334 ; NoVLX-NEXT: kmovw %k0, %eax
17335 ; NoVLX-NEXT: vzeroupper
17338 %0 = bitcast <2 x i64> %__a to <2 x i64>
17339 %1 = bitcast <2 x i64> %__b to <2 x i64>
17340 %2 = icmp ult <2 x i64> %0, %1
17341 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17342 %4 = bitcast <4 x i1> %3 to i4
17346 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17347 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17348 ; VLX: # %bb.0: # %entry
17349 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17350 ; VLX-NEXT: kmovb %k0, %eax
17353 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17354 ; NoVLX: # %bb.0: # %entry
17355 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17356 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17357 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17358 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17359 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17360 ; NoVLX-NEXT: kmovw %k0, %eax
17361 ; NoVLX-NEXT: vzeroupper
17364 %0 = bitcast <2 x i64> %__a to <2 x i64>
17365 %load = load <2 x i64>, <2 x i64>* %__b
17366 %1 = bitcast <2 x i64> %load to <2 x i64>
17367 %2 = icmp ult <2 x i64> %0, %1
17368 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17369 %4 = bitcast <4 x i1> %3 to i4
17373 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17374 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17375 ; VLX: # %bb.0: # %entry
17376 ; VLX-NEXT: kmovd %edi, %k1
17377 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17378 ; VLX-NEXT: kmovb %k0, %eax
17381 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17382 ; NoVLX: # %bb.0: # %entry
17383 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17384 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17385 ; NoVLX-NEXT: kmovw %edi, %k1
17386 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17387 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17388 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17389 ; NoVLX-NEXT: kmovw %k0, %eax
17390 ; NoVLX-NEXT: vzeroupper
17393 %0 = bitcast <2 x i64> %__a to <2 x i64>
17394 %1 = bitcast <2 x i64> %__b to <2 x i64>
17395 %2 = icmp ult <2 x i64> %0, %1
17396 %3 = bitcast i8 %__u to <8 x i1>
17397 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17398 %4 = and <2 x i1> %2, %extract.i
17399 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17400 %6 = bitcast <4 x i1> %5 to i4
17404 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17405 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17406 ; VLX: # %bb.0: # %entry
17407 ; VLX-NEXT: kmovd %edi, %k1
17408 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17409 ; VLX-NEXT: kmovb %k0, %eax
17412 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17413 ; NoVLX: # %bb.0: # %entry
17414 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17415 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17416 ; NoVLX-NEXT: kmovw %edi, %k1
17417 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17418 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17419 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17420 ; NoVLX-NEXT: kmovw %k0, %eax
17421 ; NoVLX-NEXT: vzeroupper
17424 %0 = bitcast <2 x i64> %__a to <2 x i64>
17425 %load = load <2 x i64>, <2 x i64>* %__b
17426 %1 = bitcast <2 x i64> %load to <2 x i64>
17427 %2 = icmp ult <2 x i64> %0, %1
17428 %3 = bitcast i8 %__u to <8 x i1>
17429 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17430 %4 = and <2 x i1> %2, %extract.i
17431 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17432 %6 = bitcast <4 x i1> %5 to i4
17437 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17438 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17439 ; VLX: # %bb.0: # %entry
17440 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17441 ; VLX-NEXT: kmovb %k0, %eax
17444 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17445 ; NoVLX: # %bb.0: # %entry
17446 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17447 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
17448 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17449 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17450 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17451 ; NoVLX-NEXT: kmovw %k0, %eax
17452 ; NoVLX-NEXT: vzeroupper
17455 %0 = bitcast <2 x i64> %__a to <2 x i64>
17456 %load = load i64, i64* %__b
17457 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17458 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17459 %2 = icmp ult <2 x i64> %0, %1
17460 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17461 %4 = bitcast <4 x i1> %3 to i4
17465 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17466 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17467 ; VLX: # %bb.0: # %entry
17468 ; VLX-NEXT: kmovd %edi, %k1
17469 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17470 ; VLX-NEXT: kmovb %k0, %eax
17473 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17474 ; NoVLX: # %bb.0: # %entry
17475 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17476 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
17477 ; NoVLX-NEXT: kmovw %edi, %k1
17478 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17479 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17480 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17481 ; NoVLX-NEXT: kmovw %k0, %eax
17482 ; NoVLX-NEXT: vzeroupper
17485 %0 = bitcast <2 x i64> %__a to <2 x i64>
17486 %load = load i64, i64* %__b
17487 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17488 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17489 %2 = icmp ult <2 x i64> %0, %1
17490 %3 = bitcast i8 %__u to <8 x i1>
17491 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17492 %4 = and <2 x i1> %extract.i, %2
17493 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17494 %6 = bitcast <4 x i1> %5 to i4
17499 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17500 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17501 ; VLX: # %bb.0: # %entry
17502 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17503 ; VLX-NEXT: kmovd %k0, %eax
17504 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17507 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17508 ; NoVLX: # %bb.0: # %entry
17509 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17510 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17511 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17512 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17513 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17514 ; NoVLX-NEXT: kmovw %k0, %eax
17515 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17516 ; NoVLX-NEXT: vzeroupper
17519 %0 = bitcast <2 x i64> %__a to <2 x i64>
17520 %1 = bitcast <2 x i64> %__b to <2 x i64>
17521 %2 = icmp ult <2 x i64> %0, %1
17522 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17523 %4 = bitcast <8 x i1> %3 to i8
17527 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17528 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17529 ; VLX: # %bb.0: # %entry
17530 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17531 ; VLX-NEXT: kmovd %k0, %eax
17532 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17535 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17536 ; NoVLX: # %bb.0: # %entry
17537 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17538 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17539 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17540 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17541 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17542 ; NoVLX-NEXT: kmovw %k0, %eax
17543 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17544 ; NoVLX-NEXT: vzeroupper
17547 %0 = bitcast <2 x i64> %__a to <2 x i64>
17548 %load = load <2 x i64>, <2 x i64>* %__b
17549 %1 = bitcast <2 x i64> %load to <2 x i64>
17550 %2 = icmp ult <2 x i64> %0, %1
17551 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17552 %4 = bitcast <8 x i1> %3 to i8
17556 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17557 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17558 ; VLX: # %bb.0: # %entry
17559 ; VLX-NEXT: kmovd %edi, %k1
17560 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17561 ; VLX-NEXT: kmovd %k0, %eax
17562 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17565 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17566 ; NoVLX: # %bb.0: # %entry
17567 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17568 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17569 ; NoVLX-NEXT: kmovw %edi, %k1
17570 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17571 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17572 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17573 ; NoVLX-NEXT: kmovw %k0, %eax
17574 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17575 ; NoVLX-NEXT: vzeroupper
17578 %0 = bitcast <2 x i64> %__a to <2 x i64>
17579 %1 = bitcast <2 x i64> %__b to <2 x i64>
17580 %2 = icmp ult <2 x i64> %0, %1
17581 %3 = bitcast i8 %__u to <8 x i1>
17582 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17583 %4 = and <2 x i1> %2, %extract.i
17584 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17585 %6 = bitcast <8 x i1> %5 to i8
17589 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17590 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17591 ; VLX: # %bb.0: # %entry
17592 ; VLX-NEXT: kmovd %edi, %k1
17593 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17594 ; VLX-NEXT: kmovd %k0, %eax
17595 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17598 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17599 ; NoVLX: # %bb.0: # %entry
17600 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17601 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17602 ; NoVLX-NEXT: kmovw %edi, %k1
17603 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17604 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17605 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17606 ; NoVLX-NEXT: kmovw %k0, %eax
17607 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17608 ; NoVLX-NEXT: vzeroupper
17611 %0 = bitcast <2 x i64> %__a to <2 x i64>
17612 %load = load <2 x i64>, <2 x i64>* %__b
17613 %1 = bitcast <2 x i64> %load to <2 x i64>
17614 %2 = icmp ult <2 x i64> %0, %1
17615 %3 = bitcast i8 %__u to <8 x i1>
17616 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17617 %4 = and <2 x i1> %2, %extract.i
17618 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17619 %6 = bitcast <8 x i1> %5 to i8
17624 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17625 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17626 ; VLX: # %bb.0: # %entry
17627 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17628 ; VLX-NEXT: kmovd %k0, %eax
17629 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17632 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17633 ; NoVLX: # %bb.0: # %entry
17634 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17635 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
17636 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17637 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17638 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17639 ; NoVLX-NEXT: kmovw %k0, %eax
17640 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17641 ; NoVLX-NEXT: vzeroupper
17644 %0 = bitcast <2 x i64> %__a to <2 x i64>
17645 %load = load i64, i64* %__b
17646 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17647 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17648 %2 = icmp ult <2 x i64> %0, %1
17649 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17650 %4 = bitcast <8 x i1> %3 to i8
17654 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17655 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17656 ; VLX: # %bb.0: # %entry
17657 ; VLX-NEXT: kmovd %edi, %k1
17658 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17659 ; VLX-NEXT: kmovd %k0, %eax
17660 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17663 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17664 ; NoVLX: # %bb.0: # %entry
17665 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17666 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
17667 ; NoVLX-NEXT: kmovw %edi, %k1
17668 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17669 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17670 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17671 ; NoVLX-NEXT: kmovw %k0, %eax
17672 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17673 ; NoVLX-NEXT: vzeroupper
17676 %0 = bitcast <2 x i64> %__a to <2 x i64>
17677 %load = load i64, i64* %__b
17678 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17679 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17680 %2 = icmp ult <2 x i64> %0, %1
17681 %3 = bitcast i8 %__u to <8 x i1>
17682 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17683 %4 = and <2 x i1> %extract.i, %2
17684 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17685 %6 = bitcast <8 x i1> %5 to i8
17690 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17691 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17692 ; VLX: # %bb.0: # %entry
17693 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17694 ; VLX-NEXT: kmovd %k0, %eax
17695 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17698 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17699 ; NoVLX: # %bb.0: # %entry
17700 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17701 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17702 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17703 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17704 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17705 ; NoVLX-NEXT: kmovw %k0, %eax
17706 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17707 ; NoVLX-NEXT: vzeroupper
17710 %0 = bitcast <2 x i64> %__a to <2 x i64>
17711 %1 = bitcast <2 x i64> %__b to <2 x i64>
17712 %2 = icmp ult <2 x i64> %0, %1
17713 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17714 %4 = bitcast <16 x i1> %3 to i16
17718 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17719 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17720 ; VLX: # %bb.0: # %entry
17721 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17722 ; VLX-NEXT: kmovd %k0, %eax
17723 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17726 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17727 ; NoVLX: # %bb.0: # %entry
17728 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17729 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17730 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17731 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17732 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17733 ; NoVLX-NEXT: kmovw %k0, %eax
17734 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17735 ; NoVLX-NEXT: vzeroupper
17738 %0 = bitcast <2 x i64> %__a to <2 x i64>
17739 %load = load <2 x i64>, <2 x i64>* %__b
17740 %1 = bitcast <2 x i64> %load to <2 x i64>
17741 %2 = icmp ult <2 x i64> %0, %1
17742 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17743 %4 = bitcast <16 x i1> %3 to i16
17747 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17748 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
17749 ; VLX: # %bb.0: # %entry
17750 ; VLX-NEXT: kmovd %edi, %k1
17751 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17752 ; VLX-NEXT: kmovd %k0, %eax
17753 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17756 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
17757 ; NoVLX: # %bb.0: # %entry
17758 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17759 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17760 ; NoVLX-NEXT: kmovw %edi, %k1
17761 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17762 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17763 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17764 ; NoVLX-NEXT: kmovw %k0, %eax
17765 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17766 ; NoVLX-NEXT: vzeroupper
17769 %0 = bitcast <2 x i64> %__a to <2 x i64>
17770 %1 = bitcast <2 x i64> %__b to <2 x i64>
17771 %2 = icmp ult <2 x i64> %0, %1
17772 %3 = bitcast i8 %__u to <8 x i1>
17773 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17774 %4 = and <2 x i1> %2, %extract.i
17775 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17776 %6 = bitcast <16 x i1> %5 to i16
17780 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17781 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
17782 ; VLX: # %bb.0: # %entry
17783 ; VLX-NEXT: kmovd %edi, %k1
17784 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17785 ; VLX-NEXT: kmovd %k0, %eax
17786 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17789 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
17790 ; NoVLX: # %bb.0: # %entry
17791 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17792 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17793 ; NoVLX-NEXT: kmovw %edi, %k1
17794 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17795 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17796 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17797 ; NoVLX-NEXT: kmovw %k0, %eax
17798 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17799 ; NoVLX-NEXT: vzeroupper
17802 %0 = bitcast <2 x i64> %__a to <2 x i64>
17803 %load = load <2 x i64>, <2 x i64>* %__b
17804 %1 = bitcast <2 x i64> %load to <2 x i64>
17805 %2 = icmp ult <2 x i64> %0, %1
17806 %3 = bitcast i8 %__u to <8 x i1>
17807 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17808 %4 = and <2 x i1> %2, %extract.i
17809 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17810 %6 = bitcast <16 x i1> %5 to i16
17815 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17816 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
17817 ; VLX: # %bb.0: # %entry
17818 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17819 ; VLX-NEXT: kmovd %k0, %eax
17820 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17823 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
17824 ; NoVLX: # %bb.0: # %entry
17825 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17826 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
17827 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17828 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17829 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17830 ; NoVLX-NEXT: kmovw %k0, %eax
17831 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17832 ; NoVLX-NEXT: vzeroupper
17835 %0 = bitcast <2 x i64> %__a to <2 x i64>
17836 %load = load i64, i64* %__b
17837 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17838 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17839 %2 = icmp ult <2 x i64> %0, %1
17840 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17841 %4 = bitcast <16 x i1> %3 to i16
17845 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17846 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
17847 ; VLX: # %bb.0: # %entry
17848 ; VLX-NEXT: kmovd %edi, %k1
17849 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17850 ; VLX-NEXT: kmovd %k0, %eax
17851 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17854 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
17855 ; NoVLX: # %bb.0: # %entry
17856 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17857 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
17858 ; NoVLX-NEXT: kmovw %edi, %k1
17859 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17860 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17861 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17862 ; NoVLX-NEXT: kmovw %k0, %eax
17863 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17864 ; NoVLX-NEXT: vzeroupper
17867 %0 = bitcast <2 x i64> %__a to <2 x i64>
17868 %load = load i64, i64* %__b
17869 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17870 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17871 %2 = icmp ult <2 x i64> %0, %1
17872 %3 = bitcast i8 %__u to <8 x i1>
17873 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17874 %4 = and <2 x i1> %extract.i, %2
17875 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17876 %6 = bitcast <16 x i1> %5 to i16
17881 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17882 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
17883 ; VLX: # %bb.0: # %entry
17884 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17885 ; VLX-NEXT: kmovd %k0, %eax
17888 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
17889 ; NoVLX: # %bb.0: # %entry
17890 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17891 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17892 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17893 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17894 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17895 ; NoVLX-NEXT: kmovw %k0, %eax
17896 ; NoVLX-NEXT: vzeroupper
17899 %0 = bitcast <2 x i64> %__a to <2 x i64>
17900 %1 = bitcast <2 x i64> %__b to <2 x i64>
17901 %2 = icmp ult <2 x i64> %0, %1
17902 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17903 %4 = bitcast <32 x i1> %3 to i32
17907 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17908 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
17909 ; VLX: # %bb.0: # %entry
17910 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17911 ; VLX-NEXT: kmovd %k0, %eax
17914 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
17915 ; NoVLX: # %bb.0: # %entry
17916 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17917 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17918 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17919 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17920 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17921 ; NoVLX-NEXT: kmovw %k0, %eax
17922 ; NoVLX-NEXT: vzeroupper
17925 %0 = bitcast <2 x i64> %__a to <2 x i64>
17926 %load = load <2 x i64>, <2 x i64>* %__b
17927 %1 = bitcast <2 x i64> %load to <2 x i64>
17928 %2 = icmp ult <2 x i64> %0, %1
17929 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17930 %4 = bitcast <32 x i1> %3 to i32
17934 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17935 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
17936 ; VLX: # %bb.0: # %entry
17937 ; VLX-NEXT: kmovd %edi, %k1
17938 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17939 ; VLX-NEXT: kmovd %k0, %eax
17942 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
17943 ; NoVLX: # %bb.0: # %entry
17944 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17945 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17946 ; NoVLX-NEXT: kmovw %edi, %k1
17947 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17948 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17949 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17950 ; NoVLX-NEXT: kmovw %k0, %eax
17951 ; NoVLX-NEXT: vzeroupper
17954 %0 = bitcast <2 x i64> %__a to <2 x i64>
17955 %1 = bitcast <2 x i64> %__b to <2 x i64>
17956 %2 = icmp ult <2 x i64> %0, %1
17957 %3 = bitcast i8 %__u to <8 x i1>
17958 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17959 %4 = and <2 x i1> %2, %extract.i
17960 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17961 %6 = bitcast <32 x i1> %5 to i32
17965 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17966 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
17967 ; VLX: # %bb.0: # %entry
17968 ; VLX-NEXT: kmovd %edi, %k1
17969 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17970 ; VLX-NEXT: kmovd %k0, %eax
17973 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
17974 ; NoVLX: # %bb.0: # %entry
17975 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17976 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17977 ; NoVLX-NEXT: kmovw %edi, %k1
17978 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17979 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17980 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17981 ; NoVLX-NEXT: kmovw %k0, %eax
17982 ; NoVLX-NEXT: vzeroupper
17985 %0 = bitcast <2 x i64> %__a to <2 x i64>
17986 %load = load <2 x i64>, <2 x i64>* %__b
17987 %1 = bitcast <2 x i64> %load to <2 x i64>
17988 %2 = icmp ult <2 x i64> %0, %1
17989 %3 = bitcast i8 %__u to <8 x i1>
17990 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17991 %4 = and <2 x i1> %2, %extract.i
17992 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17993 %6 = bitcast <32 x i1> %5 to i32
17998 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17999 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
18000 ; VLX: # %bb.0: # %entry
18001 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
18002 ; VLX-NEXT: kmovd %k0, %eax
18005 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
18006 ; NoVLX: # %bb.0: # %entry
18007 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18008 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
18009 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18010 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18011 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18012 ; NoVLX-NEXT: kmovw %k0, %eax
18013 ; NoVLX-NEXT: vzeroupper
18016 %0 = bitcast <2 x i64> %__a to <2 x i64>
18017 %load = load i64, i64* %__b
18018 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18019 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18020 %2 = icmp ult <2 x i64> %0, %1
18021 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18022 %4 = bitcast <32 x i1> %3 to i32
18026 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
18027 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
18028 ; VLX: # %bb.0: # %entry
18029 ; VLX-NEXT: kmovd %edi, %k1
18030 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18031 ; VLX-NEXT: kmovd %k0, %eax
18034 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
18035 ; NoVLX: # %bb.0: # %entry
18036 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18037 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
18038 ; NoVLX-NEXT: kmovw %edi, %k1
18039 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18040 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18041 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18042 ; NoVLX-NEXT: kmovw %k0, %eax
18043 ; NoVLX-NEXT: vzeroupper
18046 %0 = bitcast <2 x i64> %__a to <2 x i64>
18047 %load = load i64, i64* %__b
18048 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18049 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18050 %2 = icmp ult <2 x i64> %0, %1
18051 %3 = bitcast i8 %__u to <8 x i1>
18052 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18053 %4 = and <2 x i1> %extract.i, %2
18054 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18055 %6 = bitcast <32 x i1> %5 to i32
18060 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18061 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
18062 ; VLX: # %bb.0: # %entry
18063 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
18064 ; VLX-NEXT: kmovq %k0, %rax
18067 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
18068 ; NoVLX: # %bb.0: # %entry
18069 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
18070 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18071 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18072 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18073 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18074 ; NoVLX-NEXT: kmovw %k0, %eax
18075 ; NoVLX-NEXT: vzeroupper
18078 %0 = bitcast <2 x i64> %__a to <2 x i64>
18079 %1 = bitcast <2 x i64> %__b to <2 x i64>
18080 %2 = icmp ult <2 x i64> %0, %1
18081 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18082 %4 = bitcast <64 x i1> %3 to i64
18086 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18087 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
18088 ; VLX: # %bb.0: # %entry
18089 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
18090 ; VLX-NEXT: kmovq %k0, %rax
18093 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
18094 ; NoVLX: # %bb.0: # %entry
18095 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18096 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
18097 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18098 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18099 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18100 ; NoVLX-NEXT: kmovw %k0, %eax
18101 ; NoVLX-NEXT: vzeroupper
18104 %0 = bitcast <2 x i64> %__a to <2 x i64>
18105 %load = load <2 x i64>, <2 x i64>* %__b
18106 %1 = bitcast <2 x i64> %load to <2 x i64>
18107 %2 = icmp ult <2 x i64> %0, %1
18108 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18109 %4 = bitcast <64 x i1> %3 to i64
18113 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18114 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18115 ; VLX: # %bb.0: # %entry
18116 ; VLX-NEXT: kmovd %edi, %k1
18117 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
18118 ; VLX-NEXT: kmovq %k0, %rax
18121 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18122 ; NoVLX: # %bb.0: # %entry
18123 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
18124 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18125 ; NoVLX-NEXT: kmovw %edi, %k1
18126 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18127 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18128 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18129 ; NoVLX-NEXT: kmovw %k0, %eax
18130 ; NoVLX-NEXT: vzeroupper
18133 %0 = bitcast <2 x i64> %__a to <2 x i64>
18134 %1 = bitcast <2 x i64> %__b to <2 x i64>
18135 %2 = icmp ult <2 x i64> %0, %1
18136 %3 = bitcast i8 %__u to <8 x i1>
18137 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18138 %4 = and <2 x i1> %2, %extract.i
18139 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18140 %6 = bitcast <64 x i1> %5 to i64
18144 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18145 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18146 ; VLX: # %bb.0: # %entry
18147 ; VLX-NEXT: kmovd %edi, %k1
18148 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
18149 ; VLX-NEXT: kmovq %k0, %rax
18152 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18153 ; NoVLX: # %bb.0: # %entry
18154 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18155 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
18156 ; NoVLX-NEXT: kmovw %edi, %k1
18157 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18158 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18159 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18160 ; NoVLX-NEXT: kmovw %k0, %eax
18161 ; NoVLX-NEXT: vzeroupper
18164 %0 = bitcast <2 x i64> %__a to <2 x i64>
18165 %load = load <2 x i64>, <2 x i64>* %__b
18166 %1 = bitcast <2 x i64> %load to <2 x i64>
18167 %2 = icmp ult <2 x i64> %0, %1
18168 %3 = bitcast i8 %__u to <8 x i1>
18169 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18170 %4 = and <2 x i1> %2, %extract.i
18171 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18172 %6 = bitcast <64 x i1> %5 to i64
18177 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
18178 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18179 ; VLX: # %bb.0: # %entry
18180 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
18181 ; VLX-NEXT: kmovq %k0, %rax
18184 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18185 ; NoVLX: # %bb.0: # %entry
18186 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18187 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
18188 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18189 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18190 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18191 ; NoVLX-NEXT: kmovw %k0, %eax
18192 ; NoVLX-NEXT: vzeroupper
18195 %0 = bitcast <2 x i64> %__a to <2 x i64>
18196 %load = load i64, i64* %__b
18197 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18198 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18199 %2 = icmp ult <2 x i64> %0, %1
18200 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18201 %4 = bitcast <64 x i1> %3 to i64
18205 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
18206 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18207 ; VLX: # %bb.0: # %entry
18208 ; VLX-NEXT: kmovd %edi, %k1
18209 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18210 ; VLX-NEXT: kmovq %k0, %rax
18213 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18214 ; NoVLX: # %bb.0: # %entry
18215 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18216 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
18217 ; NoVLX-NEXT: kmovw %edi, %k1
18218 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18219 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18220 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18221 ; NoVLX-NEXT: kmovw %k0, %eax
18222 ; NoVLX-NEXT: vzeroupper
18225 %0 = bitcast <2 x i64> %__a to <2 x i64>
18226 %load = load i64, i64* %__b
18227 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18228 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18229 %2 = icmp ult <2 x i64> %0, %1
18230 %3 = bitcast i8 %__u to <8 x i1>
18231 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18232 %4 = and <2 x i1> %extract.i, %2
18233 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18234 %6 = bitcast <64 x i1> %5 to i64
18239 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18240 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18241 ; VLX: # %bb.0: # %entry
18242 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18243 ; VLX-NEXT: kmovd %k0, %eax
18244 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18245 ; VLX-NEXT: vzeroupper
18248 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18249 ; NoVLX: # %bb.0: # %entry
18250 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18251 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18252 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18253 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18254 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18255 ; NoVLX-NEXT: kmovw %k0, %eax
18256 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18257 ; NoVLX-NEXT: vzeroupper
18260 %0 = bitcast <4 x i64> %__a to <4 x i64>
18261 %1 = bitcast <4 x i64> %__b to <4 x i64>
18262 %2 = icmp ult <4 x i64> %0, %1
18263 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18264 %4 = bitcast <8 x i1> %3 to i8
18268 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18269 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18270 ; VLX: # %bb.0: # %entry
18271 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18272 ; VLX-NEXT: kmovd %k0, %eax
18273 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18274 ; VLX-NEXT: vzeroupper
18277 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18278 ; NoVLX: # %bb.0: # %entry
18279 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18280 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18281 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18282 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18283 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18284 ; NoVLX-NEXT: kmovw %k0, %eax
18285 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18286 ; NoVLX-NEXT: vzeroupper
18289 %0 = bitcast <4 x i64> %__a to <4 x i64>
18290 %load = load <4 x i64>, <4 x i64>* %__b
18291 %1 = bitcast <4 x i64> %load to <4 x i64>
18292 %2 = icmp ult <4 x i64> %0, %1
18293 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18294 %4 = bitcast <8 x i1> %3 to i8
18298 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18299 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18300 ; VLX: # %bb.0: # %entry
18301 ; VLX-NEXT: kmovd %edi, %k1
18302 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18303 ; VLX-NEXT: kmovd %k0, %eax
18304 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18305 ; VLX-NEXT: vzeroupper
18308 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18309 ; NoVLX: # %bb.0: # %entry
18310 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18311 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18312 ; NoVLX-NEXT: kmovw %edi, %k1
18313 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18314 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18315 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18316 ; NoVLX-NEXT: kmovw %k0, %eax
18317 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18318 ; NoVLX-NEXT: vzeroupper
18321 %0 = bitcast <4 x i64> %__a to <4 x i64>
18322 %1 = bitcast <4 x i64> %__b to <4 x i64>
18323 %2 = icmp ult <4 x i64> %0, %1
18324 %3 = bitcast i8 %__u to <8 x i1>
18325 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18326 %4 = and <4 x i1> %2, %extract.i
18327 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18328 %6 = bitcast <8 x i1> %5 to i8
18332 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18333 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18334 ; VLX: # %bb.0: # %entry
18335 ; VLX-NEXT: kmovd %edi, %k1
18336 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18337 ; VLX-NEXT: kmovd %k0, %eax
18338 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18339 ; VLX-NEXT: vzeroupper
18342 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18343 ; NoVLX: # %bb.0: # %entry
18344 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18345 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18346 ; NoVLX-NEXT: kmovw %edi, %k1
18347 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18348 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18349 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18350 ; NoVLX-NEXT: kmovw %k0, %eax
18351 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18352 ; NoVLX-NEXT: vzeroupper
18355 %0 = bitcast <4 x i64> %__a to <4 x i64>
18356 %load = load <4 x i64>, <4 x i64>* %__b
18357 %1 = bitcast <4 x i64> %load to <4 x i64>
18358 %2 = icmp ult <4 x i64> %0, %1
18359 %3 = bitcast i8 %__u to <8 x i1>
18360 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18361 %4 = and <4 x i1> %2, %extract.i
18362 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18363 %6 = bitcast <8 x i1> %5 to i8
18368 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18369 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18370 ; VLX: # %bb.0: # %entry
18371 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18372 ; VLX-NEXT: kmovd %k0, %eax
18373 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18374 ; VLX-NEXT: vzeroupper
18377 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18378 ; NoVLX: # %bb.0: # %entry
18379 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18380 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
18381 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18382 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18383 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18384 ; NoVLX-NEXT: kmovw %k0, %eax
18385 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18386 ; NoVLX-NEXT: vzeroupper
18389 %0 = bitcast <4 x i64> %__a to <4 x i64>
18390 %load = load i64, i64* %__b
18391 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18392 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18393 %2 = icmp ult <4 x i64> %0, %1
18394 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18395 %4 = bitcast <8 x i1> %3 to i8
18399 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18400 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18401 ; VLX: # %bb.0: # %entry
18402 ; VLX-NEXT: kmovd %edi, %k1
18403 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18404 ; VLX-NEXT: kmovd %k0, %eax
18405 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18406 ; VLX-NEXT: vzeroupper
18409 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18410 ; NoVLX: # %bb.0: # %entry
18411 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18412 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
18413 ; NoVLX-NEXT: kmovw %edi, %k1
18414 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18415 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18416 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18417 ; NoVLX-NEXT: kmovw %k0, %eax
18418 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18419 ; NoVLX-NEXT: vzeroupper
18422 %0 = bitcast <4 x i64> %__a to <4 x i64>
18423 %load = load i64, i64* %__b
18424 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18425 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18426 %2 = icmp ult <4 x i64> %0, %1
18427 %3 = bitcast i8 %__u to <8 x i1>
18428 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18429 %4 = and <4 x i1> %extract.i, %2
18430 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18431 %6 = bitcast <8 x i1> %5 to i8
18436 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18437 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18438 ; VLX: # %bb.0: # %entry
18439 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18440 ; VLX-NEXT: kmovd %k0, %eax
18441 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18442 ; VLX-NEXT: vzeroupper
18445 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18446 ; NoVLX: # %bb.0: # %entry
18447 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18448 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18449 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18450 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18451 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18452 ; NoVLX-NEXT: kmovw %k0, %eax
18453 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18454 ; NoVLX-NEXT: vzeroupper
18457 %0 = bitcast <4 x i64> %__a to <4 x i64>
18458 %1 = bitcast <4 x i64> %__b to <4 x i64>
18459 %2 = icmp ult <4 x i64> %0, %1
18460 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18461 %4 = bitcast <16 x i1> %3 to i16
18465 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18466 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18467 ; VLX: # %bb.0: # %entry
18468 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18469 ; VLX-NEXT: kmovd %k0, %eax
18470 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18471 ; VLX-NEXT: vzeroupper
18474 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18475 ; NoVLX: # %bb.0: # %entry
18476 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18477 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18478 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18479 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18480 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18481 ; NoVLX-NEXT: kmovw %k0, %eax
18482 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18483 ; NoVLX-NEXT: vzeroupper
18486 %0 = bitcast <4 x i64> %__a to <4 x i64>
18487 %load = load <4 x i64>, <4 x i64>* %__b
18488 %1 = bitcast <4 x i64> %load to <4 x i64>
18489 %2 = icmp ult <4 x i64> %0, %1
18490 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18491 %4 = bitcast <16 x i1> %3 to i16
18495 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18496 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18497 ; VLX: # %bb.0: # %entry
18498 ; VLX-NEXT: kmovd %edi, %k1
18499 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18500 ; VLX-NEXT: kmovd %k0, %eax
18501 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18502 ; VLX-NEXT: vzeroupper
18505 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18506 ; NoVLX: # %bb.0: # %entry
18507 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18508 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18509 ; NoVLX-NEXT: kmovw %edi, %k1
18510 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18511 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18512 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18513 ; NoVLX-NEXT: kmovw %k0, %eax
18514 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18515 ; NoVLX-NEXT: vzeroupper
18518 %0 = bitcast <4 x i64> %__a to <4 x i64>
18519 %1 = bitcast <4 x i64> %__b to <4 x i64>
18520 %2 = icmp ult <4 x i64> %0, %1
18521 %3 = bitcast i8 %__u to <8 x i1>
18522 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18523 %4 = and <4 x i1> %2, %extract.i
18524 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18525 %6 = bitcast <16 x i1> %5 to i16
18529 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18530 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18531 ; VLX: # %bb.0: # %entry
18532 ; VLX-NEXT: kmovd %edi, %k1
18533 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18534 ; VLX-NEXT: kmovd %k0, %eax
18535 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18536 ; VLX-NEXT: vzeroupper
18539 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18540 ; NoVLX: # %bb.0: # %entry
18541 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18542 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18543 ; NoVLX-NEXT: kmovw %edi, %k1
18544 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18545 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18546 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18547 ; NoVLX-NEXT: kmovw %k0, %eax
18548 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18549 ; NoVLX-NEXT: vzeroupper
18552 %0 = bitcast <4 x i64> %__a to <4 x i64>
18553 %load = load <4 x i64>, <4 x i64>* %__b
18554 %1 = bitcast <4 x i64> %load to <4 x i64>
18555 %2 = icmp ult <4 x i64> %0, %1
18556 %3 = bitcast i8 %__u to <8 x i1>
18557 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18558 %4 = and <4 x i1> %2, %extract.i
18559 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18560 %6 = bitcast <16 x i1> %5 to i16
18565 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18566 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18567 ; VLX: # %bb.0: # %entry
18568 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18569 ; VLX-NEXT: kmovd %k0, %eax
18570 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18571 ; VLX-NEXT: vzeroupper
18574 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18575 ; NoVLX: # %bb.0: # %entry
18576 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18577 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
18578 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18579 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18580 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18581 ; NoVLX-NEXT: kmovw %k0, %eax
18582 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18583 ; NoVLX-NEXT: vzeroupper
18586 %0 = bitcast <4 x i64> %__a to <4 x i64>
18587 %load = load i64, i64* %__b
18588 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18589 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18590 %2 = icmp ult <4 x i64> %0, %1
18591 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18592 %4 = bitcast <16 x i1> %3 to i16
18596 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18597 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18598 ; VLX: # %bb.0: # %entry
18599 ; VLX-NEXT: kmovd %edi, %k1
18600 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18601 ; VLX-NEXT: kmovd %k0, %eax
18602 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18603 ; VLX-NEXT: vzeroupper
18606 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18607 ; NoVLX: # %bb.0: # %entry
18608 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18609 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
18610 ; NoVLX-NEXT: kmovw %edi, %k1
18611 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18612 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18613 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18614 ; NoVLX-NEXT: kmovw %k0, %eax
18615 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18616 ; NoVLX-NEXT: vzeroupper
18619 %0 = bitcast <4 x i64> %__a to <4 x i64>
18620 %load = load i64, i64* %__b
18621 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18622 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18623 %2 = icmp ult <4 x i64> %0, %1
18624 %3 = bitcast i8 %__u to <8 x i1>
18625 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18626 %4 = and <4 x i1> %extract.i, %2
18627 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18628 %6 = bitcast <16 x i1> %5 to i16
18633 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18634 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18635 ; VLX: # %bb.0: # %entry
18636 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18637 ; VLX-NEXT: kmovd %k0, %eax
18638 ; VLX-NEXT: vzeroupper
18641 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18642 ; NoVLX: # %bb.0: # %entry
18643 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18644 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18645 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18646 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18647 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18648 ; NoVLX-NEXT: kmovw %k0, %eax
18649 ; NoVLX-NEXT: vzeroupper
18652 %0 = bitcast <4 x i64> %__a to <4 x i64>
18653 %1 = bitcast <4 x i64> %__b to <4 x i64>
18654 %2 = icmp ult <4 x i64> %0, %1
18655 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18656 %4 = bitcast <32 x i1> %3 to i32
18660 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18661 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18662 ; VLX: # %bb.0: # %entry
18663 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18664 ; VLX-NEXT: kmovd %k0, %eax
18665 ; VLX-NEXT: vzeroupper
18668 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18669 ; NoVLX: # %bb.0: # %entry
18670 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18671 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18672 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18673 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18674 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18675 ; NoVLX-NEXT: kmovw %k0, %eax
18676 ; NoVLX-NEXT: vzeroupper
18679 %0 = bitcast <4 x i64> %__a to <4 x i64>
18680 %load = load <4 x i64>, <4 x i64>* %__b
18681 %1 = bitcast <4 x i64> %load to <4 x i64>
18682 %2 = icmp ult <4 x i64> %0, %1
18683 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18684 %4 = bitcast <32 x i1> %3 to i32
18688 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18689 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18690 ; VLX: # %bb.0: # %entry
18691 ; VLX-NEXT: kmovd %edi, %k1
18692 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18693 ; VLX-NEXT: kmovd %k0, %eax
18694 ; VLX-NEXT: vzeroupper
18697 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18698 ; NoVLX: # %bb.0: # %entry
18699 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18700 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18701 ; NoVLX-NEXT: kmovw %edi, %k1
18702 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18703 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18704 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18705 ; NoVLX-NEXT: kmovw %k0, %eax
18706 ; NoVLX-NEXT: vzeroupper
18709 %0 = bitcast <4 x i64> %__a to <4 x i64>
18710 %1 = bitcast <4 x i64> %__b to <4 x i64>
18711 %2 = icmp ult <4 x i64> %0, %1
18712 %3 = bitcast i8 %__u to <8 x i1>
18713 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18714 %4 = and <4 x i1> %2, %extract.i
18715 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18716 %6 = bitcast <32 x i1> %5 to i32
18720 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18721 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18722 ; VLX: # %bb.0: # %entry
18723 ; VLX-NEXT: kmovd %edi, %k1
18724 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18725 ; VLX-NEXT: kmovd %k0, %eax
18726 ; VLX-NEXT: vzeroupper
18729 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18730 ; NoVLX: # %bb.0: # %entry
18731 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18732 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18733 ; NoVLX-NEXT: kmovw %edi, %k1
18734 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18735 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18736 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18737 ; NoVLX-NEXT: kmovw %k0, %eax
18738 ; NoVLX-NEXT: vzeroupper
18741 %0 = bitcast <4 x i64> %__a to <4 x i64>
18742 %load = load <4 x i64>, <4 x i64>* %__b
18743 %1 = bitcast <4 x i64> %load to <4 x i64>
18744 %2 = icmp ult <4 x i64> %0, %1
18745 %3 = bitcast i8 %__u to <8 x i1>
18746 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18747 %4 = and <4 x i1> %2, %extract.i
18748 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18749 %6 = bitcast <32 x i1> %5 to i32
18754 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18755 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
18756 ; VLX: # %bb.0: # %entry
18757 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18758 ; VLX-NEXT: kmovd %k0, %eax
18759 ; VLX-NEXT: vzeroupper
18762 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
18763 ; NoVLX: # %bb.0: # %entry
18764 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18765 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
18766 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18767 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18768 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18769 ; NoVLX-NEXT: kmovw %k0, %eax
18770 ; NoVLX-NEXT: vzeroupper
18773 %0 = bitcast <4 x i64> %__a to <4 x i64>
18774 %load = load i64, i64* %__b
18775 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18776 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18777 %2 = icmp ult <4 x i64> %0, %1
18778 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18779 %4 = bitcast <32 x i1> %3 to i32
18783 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18784 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
18785 ; VLX: # %bb.0: # %entry
18786 ; VLX-NEXT: kmovd %edi, %k1
18787 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18788 ; VLX-NEXT: kmovd %k0, %eax
18789 ; VLX-NEXT: vzeroupper
18792 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
18793 ; NoVLX: # %bb.0: # %entry
18794 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18795 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
18796 ; NoVLX-NEXT: kmovw %edi, %k1
18797 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18798 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18799 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18800 ; NoVLX-NEXT: kmovw %k0, %eax
18801 ; NoVLX-NEXT: vzeroupper
18804 %0 = bitcast <4 x i64> %__a to <4 x i64>
18805 %load = load i64, i64* %__b
18806 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18807 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18808 %2 = icmp ult <4 x i64> %0, %1
18809 %3 = bitcast i8 %__u to <8 x i1>
18810 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18811 %4 = and <4 x i1> %extract.i, %2
18812 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18813 %6 = bitcast <32 x i1> %5 to i32
18818 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18819 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
18820 ; VLX: # %bb.0: # %entry
18821 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18822 ; VLX-NEXT: kmovq %k0, %rax
18823 ; VLX-NEXT: vzeroupper
18826 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
18827 ; NoVLX: # %bb.0: # %entry
18828 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18829 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18830 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18831 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18832 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18833 ; NoVLX-NEXT: kmovw %k0, %eax
18834 ; NoVLX-NEXT: vzeroupper
18837 %0 = bitcast <4 x i64> %__a to <4 x i64>
18838 %1 = bitcast <4 x i64> %__b to <4 x i64>
18839 %2 = icmp ult <4 x i64> %0, %1
18840 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18841 %4 = bitcast <64 x i1> %3 to i64
18845 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18846 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
18847 ; VLX: # %bb.0: # %entry
18848 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18849 ; VLX-NEXT: kmovq %k0, %rax
18850 ; VLX-NEXT: vzeroupper
18853 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
18854 ; NoVLX: # %bb.0: # %entry
18855 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18856 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18857 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18858 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18859 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18860 ; NoVLX-NEXT: kmovw %k0, %eax
18861 ; NoVLX-NEXT: vzeroupper
18864 %0 = bitcast <4 x i64> %__a to <4 x i64>
18865 %load = load <4 x i64>, <4 x i64>* %__b
18866 %1 = bitcast <4 x i64> %load to <4 x i64>
18867 %2 = icmp ult <4 x i64> %0, %1
18868 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18869 %4 = bitcast <64 x i1> %3 to i64
18873 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18874 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
18875 ; VLX: # %bb.0: # %entry
18876 ; VLX-NEXT: kmovd %edi, %k1
18877 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18878 ; VLX-NEXT: kmovq %k0, %rax
18879 ; VLX-NEXT: vzeroupper
18882 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
18883 ; NoVLX: # %bb.0: # %entry
18884 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18885 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18886 ; NoVLX-NEXT: kmovw %edi, %k1
18887 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18888 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18889 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18890 ; NoVLX-NEXT: kmovw %k0, %eax
18891 ; NoVLX-NEXT: vzeroupper
18894 %0 = bitcast <4 x i64> %__a to <4 x i64>
18895 %1 = bitcast <4 x i64> %__b to <4 x i64>
18896 %2 = icmp ult <4 x i64> %0, %1
18897 %3 = bitcast i8 %__u to <8 x i1>
18898 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18899 %4 = and <4 x i1> %2, %extract.i
18900 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18901 %6 = bitcast <64 x i1> %5 to i64
18905 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18906 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
18907 ; VLX: # %bb.0: # %entry
18908 ; VLX-NEXT: kmovd %edi, %k1
18909 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18910 ; VLX-NEXT: kmovq %k0, %rax
18911 ; VLX-NEXT: vzeroupper
18914 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
18915 ; NoVLX: # %bb.0: # %entry
18916 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18917 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18918 ; NoVLX-NEXT: kmovw %edi, %k1
18919 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18920 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18921 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18922 ; NoVLX-NEXT: kmovw %k0, %eax
18923 ; NoVLX-NEXT: vzeroupper
18926 %0 = bitcast <4 x i64> %__a to <4 x i64>
18927 %load = load <4 x i64>, <4 x i64>* %__b
18928 %1 = bitcast <4 x i64> %load to <4 x i64>
18929 %2 = icmp ult <4 x i64> %0, %1
18930 %3 = bitcast i8 %__u to <8 x i1>
18931 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18932 %4 = and <4 x i1> %2, %extract.i
18933 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18934 %6 = bitcast <64 x i1> %5 to i64
18939 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18940 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
18941 ; VLX: # %bb.0: # %entry
18942 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18943 ; VLX-NEXT: kmovq %k0, %rax
18944 ; VLX-NEXT: vzeroupper
18947 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
18948 ; NoVLX: # %bb.0: # %entry
18949 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18950 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
18951 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18952 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18953 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18954 ; NoVLX-NEXT: kmovw %k0, %eax
18955 ; NoVLX-NEXT: vzeroupper
18958 %0 = bitcast <4 x i64> %__a to <4 x i64>
18959 %load = load i64, i64* %__b
18960 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18961 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18962 %2 = icmp ult <4 x i64> %0, %1
18963 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18964 %4 = bitcast <64 x i1> %3 to i64
18968 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18969 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
18970 ; VLX: # %bb.0: # %entry
18971 ; VLX-NEXT: kmovd %edi, %k1
18972 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18973 ; VLX-NEXT: kmovq %k0, %rax
18974 ; VLX-NEXT: vzeroupper
18977 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
18978 ; NoVLX: # %bb.0: # %entry
18979 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18980 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
18981 ; NoVLX-NEXT: kmovw %edi, %k1
18982 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18983 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18984 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18985 ; NoVLX-NEXT: kmovw %k0, %eax
18986 ; NoVLX-NEXT: vzeroupper
18989 %0 = bitcast <4 x i64> %__a to <4 x i64>
18990 %load = load i64, i64* %__b
18991 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18992 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18993 %2 = icmp ult <4 x i64> %0, %1
18994 %3 = bitcast i8 %__u to <8 x i1>
18995 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18996 %4 = and <4 x i1> %extract.i, %2
18997 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18998 %6 = bitcast <64 x i1> %5 to i64
19003 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19004 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
19005 ; VLX: # %bb.0: # %entry
19006 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19007 ; VLX-NEXT: kmovd %k0, %eax
19008 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19009 ; VLX-NEXT: vzeroupper
19012 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
19013 ; NoVLX: # %bb.0: # %entry
19014 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19015 ; NoVLX-NEXT: kmovw %k0, %eax
19016 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19017 ; NoVLX-NEXT: vzeroupper
19020 %0 = bitcast <8 x i64> %__a to <8 x i64>
19021 %1 = bitcast <8 x i64> %__b to <8 x i64>
19022 %2 = icmp ult <8 x i64> %0, %1
19023 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19024 %4 = bitcast <16 x i1> %3 to i16
19028 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19029 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
19030 ; VLX: # %bb.0: # %entry
19031 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19032 ; VLX-NEXT: kmovd %k0, %eax
19033 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19034 ; VLX-NEXT: vzeroupper
19037 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
19038 ; NoVLX: # %bb.0: # %entry
19039 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19040 ; NoVLX-NEXT: kmovw %k0, %eax
19041 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19042 ; NoVLX-NEXT: vzeroupper
19045 %0 = bitcast <8 x i64> %__a to <8 x i64>
19046 %load = load <8 x i64>, <8 x i64>* %__b
19047 %1 = bitcast <8 x i64> %load to <8 x i64>
19048 %2 = icmp ult <8 x i64> %0, %1
19049 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19050 %4 = bitcast <16 x i1> %3 to i16
19054 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19055 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
19056 ; VLX: # %bb.0: # %entry
19057 ; VLX-NEXT: kmovd %edi, %k1
19058 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19059 ; VLX-NEXT: kmovd %k0, %eax
19060 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19061 ; VLX-NEXT: vzeroupper
19064 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
19065 ; NoVLX: # %bb.0: # %entry
19066 ; NoVLX-NEXT: kmovw %edi, %k1
19067 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19068 ; NoVLX-NEXT: kmovw %k0, %eax
19069 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19070 ; NoVLX-NEXT: vzeroupper
19073 %0 = bitcast <8 x i64> %__a to <8 x i64>
19074 %1 = bitcast <8 x i64> %__b to <8 x i64>
19075 %2 = icmp ult <8 x i64> %0, %1
19076 %3 = bitcast i8 %__u to <8 x i1>
19077 %4 = and <8 x i1> %2, %3
19078 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19079 %6 = bitcast <16 x i1> %5 to i16
19083 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19084 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
19085 ; VLX: # %bb.0: # %entry
19086 ; VLX-NEXT: kmovd %edi, %k1
19087 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19088 ; VLX-NEXT: kmovd %k0, %eax
19089 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19090 ; VLX-NEXT: vzeroupper
19093 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
19094 ; NoVLX: # %bb.0: # %entry
19095 ; NoVLX-NEXT: kmovw %edi, %k1
19096 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19097 ; NoVLX-NEXT: kmovw %k0, %eax
19098 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19099 ; NoVLX-NEXT: vzeroupper
19102 %0 = bitcast <8 x i64> %__a to <8 x i64>
19103 %load = load <8 x i64>, <8 x i64>* %__b
19104 %1 = bitcast <8 x i64> %load to <8 x i64>
19105 %2 = icmp ult <8 x i64> %0, %1
19106 %3 = bitcast i8 %__u to <8 x i1>
19107 %4 = and <8 x i1> %2, %3
19108 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19109 %6 = bitcast <16 x i1> %5 to i16
19114 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19115 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
19116 ; VLX: # %bb.0: # %entry
19117 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19118 ; VLX-NEXT: kmovd %k0, %eax
19119 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19120 ; VLX-NEXT: vzeroupper
19123 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
19124 ; NoVLX: # %bb.0: # %entry
19125 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19126 ; NoVLX-NEXT: kmovw %k0, %eax
19127 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19128 ; NoVLX-NEXT: vzeroupper
19131 %0 = bitcast <8 x i64> %__a to <8 x i64>
19132 %load = load i64, i64* %__b
19133 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19134 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19135 %2 = icmp ult <8 x i64> %0, %1
19136 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19137 %4 = bitcast <16 x i1> %3 to i16
19141 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19142 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19143 ; VLX: # %bb.0: # %entry
19144 ; VLX-NEXT: kmovd %edi, %k1
19145 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19146 ; VLX-NEXT: kmovd %k0, %eax
19147 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19148 ; VLX-NEXT: vzeroupper
19151 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19152 ; NoVLX: # %bb.0: # %entry
19153 ; NoVLX-NEXT: kmovw %edi, %k1
19154 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19155 ; NoVLX-NEXT: kmovw %k0, %eax
19156 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19157 ; NoVLX-NEXT: vzeroupper
19160 %0 = bitcast <8 x i64> %__a to <8 x i64>
19161 %load = load i64, i64* %__b
19162 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19163 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19164 %2 = icmp ult <8 x i64> %0, %1
19165 %3 = bitcast i8 %__u to <8 x i1>
19166 %4 = and <8 x i1> %3, %2
19167 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19168 %6 = bitcast <16 x i1> %5 to i16
19173 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19174 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19175 ; VLX: # %bb.0: # %entry
19176 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19177 ; VLX-NEXT: kmovd %k0, %eax
19178 ; VLX-NEXT: vzeroupper
19181 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19182 ; NoVLX: # %bb.0: # %entry
19183 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19184 ; NoVLX-NEXT: kmovw %k0, %eax
19185 ; NoVLX-NEXT: vzeroupper
19188 %0 = bitcast <8 x i64> %__a to <8 x i64>
19189 %1 = bitcast <8 x i64> %__b to <8 x i64>
19190 %2 = icmp ult <8 x i64> %0, %1
19191 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19192 %4 = bitcast <32 x i1> %3 to i32
19196 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19197 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19198 ; VLX: # %bb.0: # %entry
19199 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19200 ; VLX-NEXT: kmovd %k0, %eax
19201 ; VLX-NEXT: vzeroupper
19204 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19205 ; NoVLX: # %bb.0: # %entry
19206 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19207 ; NoVLX-NEXT: kmovw %k0, %eax
19208 ; NoVLX-NEXT: vzeroupper
19211 %0 = bitcast <8 x i64> %__a to <8 x i64>
19212 %load = load <8 x i64>, <8 x i64>* %__b
19213 %1 = bitcast <8 x i64> %load to <8 x i64>
19214 %2 = icmp ult <8 x i64> %0, %1
19215 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19216 %4 = bitcast <32 x i1> %3 to i32
19220 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19221 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19222 ; VLX: # %bb.0: # %entry
19223 ; VLX-NEXT: kmovd %edi, %k1
19224 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19225 ; VLX-NEXT: kmovd %k0, %eax
19226 ; VLX-NEXT: vzeroupper
19229 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19230 ; NoVLX: # %bb.0: # %entry
19231 ; NoVLX-NEXT: kmovw %edi, %k1
19232 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19233 ; NoVLX-NEXT: kmovw %k0, %eax
19234 ; NoVLX-NEXT: vzeroupper
19237 %0 = bitcast <8 x i64> %__a to <8 x i64>
19238 %1 = bitcast <8 x i64> %__b to <8 x i64>
19239 %2 = icmp ult <8 x i64> %0, %1
19240 %3 = bitcast i8 %__u to <8 x i1>
19241 %4 = and <8 x i1> %2, %3
19242 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19243 %6 = bitcast <32 x i1> %5 to i32
19247 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19248 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19249 ; VLX: # %bb.0: # %entry
19250 ; VLX-NEXT: kmovd %edi, %k1
19251 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19252 ; VLX-NEXT: kmovd %k0, %eax
19253 ; VLX-NEXT: vzeroupper
19256 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19257 ; NoVLX: # %bb.0: # %entry
19258 ; NoVLX-NEXT: kmovw %edi, %k1
19259 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19260 ; NoVLX-NEXT: kmovw %k0, %eax
19261 ; NoVLX-NEXT: vzeroupper
19264 %0 = bitcast <8 x i64> %__a to <8 x i64>
19265 %load = load <8 x i64>, <8 x i64>* %__b
19266 %1 = bitcast <8 x i64> %load to <8 x i64>
19267 %2 = icmp ult <8 x i64> %0, %1
19268 %3 = bitcast i8 %__u to <8 x i1>
19269 %4 = and <8 x i1> %2, %3
19270 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19271 %6 = bitcast <32 x i1> %5 to i32
19276 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19277 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19278 ; VLX: # %bb.0: # %entry
19279 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19280 ; VLX-NEXT: kmovd %k0, %eax
19281 ; VLX-NEXT: vzeroupper
19284 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19285 ; NoVLX: # %bb.0: # %entry
19286 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19287 ; NoVLX-NEXT: kmovw %k0, %eax
19288 ; NoVLX-NEXT: vzeroupper
19291 %0 = bitcast <8 x i64> %__a to <8 x i64>
19292 %load = load i64, i64* %__b
19293 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19294 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19295 %2 = icmp ult <8 x i64> %0, %1
19296 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19297 %4 = bitcast <32 x i1> %3 to i32
19301 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19302 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19303 ; VLX: # %bb.0: # %entry
19304 ; VLX-NEXT: kmovd %edi, %k1
19305 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19306 ; VLX-NEXT: kmovd %k0, %eax
19307 ; VLX-NEXT: vzeroupper
19310 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19311 ; NoVLX: # %bb.0: # %entry
19312 ; NoVLX-NEXT: kmovw %edi, %k1
19313 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19314 ; NoVLX-NEXT: kmovw %k0, %eax
19315 ; NoVLX-NEXT: vzeroupper
19318 %0 = bitcast <8 x i64> %__a to <8 x i64>
19319 %load = load i64, i64* %__b
19320 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19321 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19322 %2 = icmp ult <8 x i64> %0, %1
19323 %3 = bitcast i8 %__u to <8 x i1>
19324 %4 = and <8 x i1> %3, %2
19325 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19326 %6 = bitcast <32 x i1> %5 to i32
19331 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19332 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19333 ; VLX: # %bb.0: # %entry
19334 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19335 ; VLX-NEXT: kmovq %k0, %rax
19336 ; VLX-NEXT: vzeroupper
19339 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19340 ; NoVLX: # %bb.0: # %entry
19341 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19342 ; NoVLX-NEXT: kmovw %k0, %eax
19343 ; NoVLX-NEXT: vzeroupper
19346 %0 = bitcast <8 x i64> %__a to <8 x i64>
19347 %1 = bitcast <8 x i64> %__b to <8 x i64>
19348 %2 = icmp ult <8 x i64> %0, %1
19349 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19350 %4 = bitcast <64 x i1> %3 to i64
19354 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19355 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19356 ; VLX: # %bb.0: # %entry
19357 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19358 ; VLX-NEXT: kmovq %k0, %rax
19359 ; VLX-NEXT: vzeroupper
19362 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19363 ; NoVLX: # %bb.0: # %entry
19364 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19365 ; NoVLX-NEXT: kmovw %k0, %eax
19366 ; NoVLX-NEXT: vzeroupper
19369 %0 = bitcast <8 x i64> %__a to <8 x i64>
19370 %load = load <8 x i64>, <8 x i64>* %__b
19371 %1 = bitcast <8 x i64> %load to <8 x i64>
19372 %2 = icmp ult <8 x i64> %0, %1
19373 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19374 %4 = bitcast <64 x i1> %3 to i64
19378 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19379 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19380 ; VLX: # %bb.0: # %entry
19381 ; VLX-NEXT: kmovd %edi, %k1
19382 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19383 ; VLX-NEXT: kmovq %k0, %rax
19384 ; VLX-NEXT: vzeroupper
19387 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19388 ; NoVLX: # %bb.0: # %entry
19389 ; NoVLX-NEXT: kmovw %edi, %k1
19390 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19391 ; NoVLX-NEXT: kmovw %k0, %eax
19392 ; NoVLX-NEXT: vzeroupper
19395 %0 = bitcast <8 x i64> %__a to <8 x i64>
19396 %1 = bitcast <8 x i64> %__b to <8 x i64>
19397 %2 = icmp ult <8 x i64> %0, %1
19398 %3 = bitcast i8 %__u to <8 x i1>
19399 %4 = and <8 x i1> %2, %3
19400 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19401 %6 = bitcast <64 x i1> %5 to i64
19405 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19406 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19407 ; VLX: # %bb.0: # %entry
19408 ; VLX-NEXT: kmovd %edi, %k1
19409 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19410 ; VLX-NEXT: kmovq %k0, %rax
19411 ; VLX-NEXT: vzeroupper
19414 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19415 ; NoVLX: # %bb.0: # %entry
19416 ; NoVLX-NEXT: kmovw %edi, %k1
19417 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19418 ; NoVLX-NEXT: kmovw %k0, %eax
19419 ; NoVLX-NEXT: vzeroupper
19422 %0 = bitcast <8 x i64> %__a to <8 x i64>
19423 %load = load <8 x i64>, <8 x i64>* %__b
19424 %1 = bitcast <8 x i64> %load to <8 x i64>
19425 %2 = icmp ult <8 x i64> %0, %1
19426 %3 = bitcast i8 %__u to <8 x i1>
19427 %4 = and <8 x i1> %2, %3
19428 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19429 %6 = bitcast <64 x i1> %5 to i64
19434 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19435 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19436 ; VLX: # %bb.0: # %entry
19437 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19438 ; VLX-NEXT: kmovq %k0, %rax
19439 ; VLX-NEXT: vzeroupper
19442 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19443 ; NoVLX: # %bb.0: # %entry
19444 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19445 ; NoVLX-NEXT: kmovw %k0, %eax
19446 ; NoVLX-NEXT: vzeroupper
19449 %0 = bitcast <8 x i64> %__a to <8 x i64>
19450 %load = load i64, i64* %__b
19451 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19452 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19453 %2 = icmp ult <8 x i64> %0, %1
19454 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19455 %4 = bitcast <64 x i1> %3 to i64
19459 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19460 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19461 ; VLX: # %bb.0: # %entry
19462 ; VLX-NEXT: kmovd %edi, %k1
19463 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19464 ; VLX-NEXT: kmovq %k0, %rax
19465 ; VLX-NEXT: vzeroupper
19468 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19469 ; NoVLX: # %bb.0: # %entry
19470 ; NoVLX-NEXT: kmovw %edi, %k1
19471 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19472 ; NoVLX-NEXT: kmovw %k0, %eax
19473 ; NoVLX-NEXT: vzeroupper
19476 %0 = bitcast <8 x i64> %__a to <8 x i64>
19477 %load = load i64, i64* %__b
19478 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19479 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19480 %2 = icmp ult <8 x i64> %0, %1
19481 %3 = bitcast i8 %__u to <8 x i1>
19482 %4 = and <8 x i1> %3, %2
19483 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19484 %6 = bitcast <64 x i1> %5 to i64
19489 declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
19490 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19491 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19492 ; VLX: # %bb.0: # %entry
19493 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19494 ; VLX-NEXT: kmovd %k0, %eax
19495 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19498 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19499 ; NoVLX: # %bb.0: # %entry
19500 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19501 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19502 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19503 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19504 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19505 ; NoVLX-NEXT: kmovw %k0, %eax
19506 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19507 ; NoVLX-NEXT: vzeroupper
19510 %0 = bitcast <2 x i64> %__a to <4 x float>
19511 %1 = bitcast <2 x i64> %__b to <4 x float>
19512 %2 = fcmp oeq <4 x float> %0, %1
19513 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19514 %4 = bitcast <8 x i1> %3 to i8
19518 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19519 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19520 ; VLX: # %bb.0: # %entry
19521 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19522 ; VLX-NEXT: kmovd %k0, %eax
19523 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19526 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19527 ; NoVLX: # %bb.0: # %entry
19528 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19529 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19530 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19531 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19532 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19533 ; NoVLX-NEXT: kmovw %k0, %eax
19534 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19535 ; NoVLX-NEXT: vzeroupper
19538 %0 = bitcast <2 x i64> %__a to <4 x float>
19539 %load = load <2 x i64>, <2 x i64>* %__b
19540 %1 = bitcast <2 x i64> %load to <4 x float>
19541 %2 = fcmp oeq <4 x float> %0, %1
19542 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19543 %4 = bitcast <8 x i1> %3 to i8
19547 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
19548 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19549 ; VLX: # %bb.0: # %entry
19550 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19551 ; VLX-NEXT: kmovd %k0, %eax
19552 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19555 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19556 ; NoVLX: # %bb.0: # %entry
19557 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19558 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
19559 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19560 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19561 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19562 ; NoVLX-NEXT: kmovw %k0, %eax
19563 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19564 ; NoVLX-NEXT: vzeroupper
19567 %0 = bitcast <2 x i64> %__a to <4 x float>
19568 %load = load float, float* %__b
19569 %vec = insertelement <4 x float> undef, float %load, i32 0
19570 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19571 %2 = fcmp oeq <4 x float> %0, %1
19572 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19573 %4 = bitcast <8 x i1> %3 to i8
19577 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19578 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19579 ; VLX: # %bb.0: # %entry
19580 ; VLX-NEXT: kmovd %edi, %k1
19581 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19582 ; VLX-NEXT: kmovd %k0, %eax
19583 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19586 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19587 ; NoVLX: # %bb.0: # %entry
19588 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19589 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19590 ; NoVLX-NEXT: kmovw %edi, %k1
19591 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19592 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19593 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19594 ; NoVLX-NEXT: kmovw %k0, %eax
19595 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19596 ; NoVLX-NEXT: vzeroupper
19599 %0 = bitcast <2 x i64> %__a to <4 x float>
19600 %1 = bitcast <2 x i64> %__b to <4 x float>
19601 %2 = fcmp oeq <4 x float> %0, %1
19602 %3 = bitcast i4 %__u to <4 x i1>
19603 %4 = and <4 x i1> %2, %3
19604 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19605 %6 = bitcast <8 x i1> %5 to i8
19609 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19610 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19611 ; VLX: # %bb.0: # %entry
19612 ; VLX-NEXT: kmovd %edi, %k1
19613 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19614 ; VLX-NEXT: kmovd %k0, %eax
19615 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19618 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19619 ; NoVLX: # %bb.0: # %entry
19620 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19621 ; NoVLX-NEXT: kmovw %edi, %k1
19622 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19623 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19624 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19625 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19626 ; NoVLX-NEXT: kmovw %k0, %eax
19627 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19628 ; NoVLX-NEXT: vzeroupper
19631 %0 = bitcast <2 x i64> %__a to <4 x float>
19632 %load = load <2 x i64>, <2 x i64>* %__b
19633 %1 = bitcast <2 x i64> %load to <4 x float>
19634 %2 = fcmp oeq <4 x float> %0, %1
19635 %3 = bitcast i4 %__u to <4 x i1>
19636 %4 = and <4 x i1> %2, %3
19637 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19638 %6 = bitcast <8 x i1> %5 to i8
19642 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
19643 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19644 ; VLX: # %bb.0: # %entry
19645 ; VLX-NEXT: kmovd %edi, %k1
19646 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19647 ; VLX-NEXT: kmovd %k0, %eax
19648 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19651 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19652 ; NoVLX: # %bb.0: # %entry
19653 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19654 ; NoVLX-NEXT: kmovw %edi, %k1
19655 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1
19656 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19657 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19658 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19659 ; NoVLX-NEXT: kmovw %k0, %eax
19660 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19661 ; NoVLX-NEXT: vzeroupper
19664 %0 = bitcast <2 x i64> %__a to <4 x float>
19665 %load = load float, float* %__b
19666 %vec = insertelement <4 x float> undef, float %load, i32 0
19667 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19668 %2 = fcmp oeq <4 x float> %0, %1
19669 %3 = bitcast i4 %__u to <4 x i1>
19670 %4 = and <4 x i1> %2, %3
19671 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19672 %6 = bitcast <8 x i1> %5 to i8
19678 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19679 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19680 ; VLX: # %bb.0: # %entry
19681 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19682 ; VLX-NEXT: kmovd %k0, %eax
19683 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19686 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19687 ; NoVLX: # %bb.0: # %entry
19688 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19689 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19690 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19691 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19692 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19693 ; NoVLX-NEXT: kmovw %k0, %eax
19694 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19695 ; NoVLX-NEXT: vzeroupper
19698 %0 = bitcast <2 x i64> %__a to <4 x float>
19699 %1 = bitcast <2 x i64> %__b to <4 x float>
19700 %2 = fcmp oeq <4 x float> %0, %1
19701 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19702 %4 = bitcast <16 x i1> %3 to i16
19706 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19707 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19708 ; VLX: # %bb.0: # %entry
19709 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19710 ; VLX-NEXT: kmovd %k0, %eax
19711 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19714 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19715 ; NoVLX: # %bb.0: # %entry
19716 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19717 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19718 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19719 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19720 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19721 ; NoVLX-NEXT: kmovw %k0, %eax
19722 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19723 ; NoVLX-NEXT: vzeroupper
19726 %0 = bitcast <2 x i64> %__a to <4 x float>
19727 %load = load <2 x i64>, <2 x i64>* %__b
19728 %1 = bitcast <2 x i64> %load to <4 x float>
19729 %2 = fcmp oeq <4 x float> %0, %1
19730 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19731 %4 = bitcast <16 x i1> %3 to i16
19735 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
19736 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19737 ; VLX: # %bb.0: # %entry
19738 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19739 ; VLX-NEXT: kmovd %k0, %eax
19740 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19743 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19744 ; NoVLX: # %bb.0: # %entry
19745 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19746 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
19747 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19748 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19749 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19750 ; NoVLX-NEXT: kmovw %k0, %eax
19751 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19752 ; NoVLX-NEXT: vzeroupper
19755 %0 = bitcast <2 x i64> %__a to <4 x float>
19756 %load = load float, float* %__b
19757 %vec = insertelement <4 x float> undef, float %load, i32 0
19758 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19759 %2 = fcmp oeq <4 x float> %0, %1
19760 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19761 %4 = bitcast <16 x i1> %3 to i16
19765 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19766 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
19767 ; VLX: # %bb.0: # %entry
19768 ; VLX-NEXT: kmovd %edi, %k1
19769 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19770 ; VLX-NEXT: kmovd %k0, %eax
19771 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19774 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
19775 ; NoVLX: # %bb.0: # %entry
19776 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19777 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19778 ; NoVLX-NEXT: kmovw %edi, %k1
19779 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19780 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19781 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19782 ; NoVLX-NEXT: kmovw %k0, %eax
19783 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19784 ; NoVLX-NEXT: vzeroupper
19787 %0 = bitcast <2 x i64> %__a to <4 x float>
19788 %1 = bitcast <2 x i64> %__b to <4 x float>
19789 %2 = fcmp oeq <4 x float> %0, %1
19790 %3 = bitcast i4 %__u to <4 x i1>
19791 %4 = and <4 x i1> %2, %3
19792 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19793 %6 = bitcast <16 x i1> %5 to i16
19797 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19798 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
19799 ; VLX: # %bb.0: # %entry
19800 ; VLX-NEXT: kmovd %edi, %k1
19801 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19802 ; VLX-NEXT: kmovd %k0, %eax
19803 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19806 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
19807 ; NoVLX: # %bb.0: # %entry
19808 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19809 ; NoVLX-NEXT: kmovw %edi, %k1
19810 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19811 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19812 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19813 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19814 ; NoVLX-NEXT: kmovw %k0, %eax
19815 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19816 ; NoVLX-NEXT: vzeroupper
19819 %0 = bitcast <2 x i64> %__a to <4 x float>
19820 %load = load <2 x i64>, <2 x i64>* %__b
19821 %1 = bitcast <2 x i64> %load to <4 x float>
19822 %2 = fcmp oeq <4 x float> %0, %1
19823 %3 = bitcast i4 %__u to <4 x i1>
19824 %4 = and <4 x i1> %2, %3
19825 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19826 %6 = bitcast <16 x i1> %5 to i16
19830 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
19831 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19832 ; VLX: # %bb.0: # %entry
19833 ; VLX-NEXT: kmovd %edi, %k1
19834 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19835 ; VLX-NEXT: kmovd %k0, %eax
19836 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19839 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19840 ; NoVLX: # %bb.0: # %entry
19841 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19842 ; NoVLX-NEXT: kmovw %edi, %k1
19843 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1
19844 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19845 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19846 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19847 ; NoVLX-NEXT: kmovw %k0, %eax
19848 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19849 ; NoVLX-NEXT: vzeroupper
19852 %0 = bitcast <2 x i64> %__a to <4 x float>
19853 %load = load float, float* %__b
19854 %vec = insertelement <4 x float> undef, float %load, i32 0
19855 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19856 %2 = fcmp oeq <4 x float> %0, %1
19857 %3 = bitcast i4 %__u to <4 x i1>
19858 %4 = and <4 x i1> %2, %3
19859 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19860 %6 = bitcast <16 x i1> %5 to i16
19866 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19867 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
19868 ; VLX: # %bb.0: # %entry
19869 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19870 ; VLX-NEXT: kmovd %k0, %eax
19873 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
19874 ; NoVLX: # %bb.0: # %entry
19875 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19876 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19877 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19878 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19879 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19880 ; NoVLX-NEXT: kmovw %k0, %eax
19881 ; NoVLX-NEXT: vzeroupper
19884 %0 = bitcast <2 x i64> %__a to <4 x float>
19885 %1 = bitcast <2 x i64> %__b to <4 x float>
19886 %2 = fcmp oeq <4 x float> %0, %1
19887 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19888 %4 = bitcast <32 x i1> %3 to i32
19892 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19893 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
19894 ; VLX: # %bb.0: # %entry
19895 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19896 ; VLX-NEXT: kmovd %k0, %eax
19899 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
19900 ; NoVLX: # %bb.0: # %entry
19901 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19902 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19903 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19904 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19905 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19906 ; NoVLX-NEXT: kmovw %k0, %eax
19907 ; NoVLX-NEXT: vzeroupper
19910 %0 = bitcast <2 x i64> %__a to <4 x float>
19911 %load = load <2 x i64>, <2 x i64>* %__b
19912 %1 = bitcast <2 x i64> %load to <4 x float>
19913 %2 = fcmp oeq <4 x float> %0, %1
19914 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19915 %4 = bitcast <32 x i1> %3 to i32
19919 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
19920 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19921 ; VLX: # %bb.0: # %entry
19922 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19923 ; VLX-NEXT: kmovd %k0, %eax
19926 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19927 ; NoVLX: # %bb.0: # %entry
19928 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19929 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
19930 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19931 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19932 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19933 ; NoVLX-NEXT: kmovw %k0, %eax
19934 ; NoVLX-NEXT: vzeroupper
19937 %0 = bitcast <2 x i64> %__a to <4 x float>
19938 %load = load float, float* %__b
19939 %vec = insertelement <4 x float> undef, float %load, i32 0
19940 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19941 %2 = fcmp oeq <4 x float> %0, %1
19942 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19943 %4 = bitcast <32 x i1> %3 to i32
19947 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19948 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
19949 ; VLX: # %bb.0: # %entry
19950 ; VLX-NEXT: kmovd %edi, %k1
19951 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19952 ; VLX-NEXT: kmovd %k0, %eax
19955 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
19956 ; NoVLX: # %bb.0: # %entry
19957 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19958 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19959 ; NoVLX-NEXT: kmovw %edi, %k1
19960 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19961 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19962 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19963 ; NoVLX-NEXT: kmovw %k0, %eax
19964 ; NoVLX-NEXT: vzeroupper
19967 %0 = bitcast <2 x i64> %__a to <4 x float>
19968 %1 = bitcast <2 x i64> %__b to <4 x float>
19969 %2 = fcmp oeq <4 x float> %0, %1
19970 %3 = bitcast i4 %__u to <4 x i1>
19971 %4 = and <4 x i1> %2, %3
19972 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19973 %6 = bitcast <32 x i1> %5 to i32
19977 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19978 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
19979 ; VLX: # %bb.0: # %entry
19980 ; VLX-NEXT: kmovd %edi, %k1
19981 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19982 ; VLX-NEXT: kmovd %k0, %eax
19985 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
19986 ; NoVLX: # %bb.0: # %entry
19987 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19988 ; NoVLX-NEXT: kmovw %edi, %k1
19989 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19990 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19991 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19992 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19993 ; NoVLX-NEXT: kmovw %k0, %eax
19994 ; NoVLX-NEXT: vzeroupper
19997 %0 = bitcast <2 x i64> %__a to <4 x float>
19998 %load = load <2 x i64>, <2 x i64>* %__b
19999 %1 = bitcast <2 x i64> %load to <4 x float>
20000 %2 = fcmp oeq <4 x float> %0, %1
20001 %3 = bitcast i4 %__u to <4 x i1>
20002 %4 = and <4 x i1> %2, %3
20003 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20004 %6 = bitcast <32 x i1> %5 to i32
20008 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
20009 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
20010 ; VLX: # %bb.0: # %entry
20011 ; VLX-NEXT: kmovd %edi, %k1
20012 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20013 ; VLX-NEXT: kmovd %k0, %eax
20016 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
20017 ; NoVLX: # %bb.0: # %entry
20018 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20019 ; NoVLX-NEXT: kmovw %edi, %k1
20020 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1
20021 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20022 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20023 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20024 ; NoVLX-NEXT: kmovw %k0, %eax
20025 ; NoVLX-NEXT: vzeroupper
20028 %0 = bitcast <2 x i64> %__a to <4 x float>
20029 %load = load float, float* %__b
20030 %vec = insertelement <4 x float> undef, float %load, i32 0
20031 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20032 %2 = fcmp oeq <4 x float> %0, %1
20033 %3 = bitcast i4 %__u to <4 x i1>
20034 %4 = and <4 x i1> %2, %3
20035 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20036 %6 = bitcast <32 x i1> %5 to i32
20042 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20043 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
20044 ; VLX: # %bb.0: # %entry
20045 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
20046 ; VLX-NEXT: kmovq %k0, %rax
20049 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
20050 ; NoVLX: # %bb.0: # %entry
20051 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
20052 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20053 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20054 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20055 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20056 ; NoVLX-NEXT: kmovw %k0, %eax
20057 ; NoVLX-NEXT: vzeroupper
20060 %0 = bitcast <2 x i64> %__a to <4 x float>
20061 %1 = bitcast <2 x i64> %__b to <4 x float>
20062 %2 = fcmp oeq <4 x float> %0, %1
20063 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20064 %4 = bitcast <64 x i1> %3 to i64
20068 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20069 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
20070 ; VLX: # %bb.0: # %entry
20071 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
20072 ; VLX-NEXT: kmovq %k0, %rax
20075 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
20076 ; NoVLX: # %bb.0: # %entry
20077 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20078 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
20079 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20080 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20081 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20082 ; NoVLX-NEXT: kmovw %k0, %eax
20083 ; NoVLX-NEXT: vzeroupper
20086 %0 = bitcast <2 x i64> %__a to <4 x float>
20087 %load = load <2 x i64>, <2 x i64>* %__b
20088 %1 = bitcast <2 x i64> %load to <4 x float>
20089 %2 = fcmp oeq <4 x float> %0, %1
20090 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20091 %4 = bitcast <64 x i1> %3 to i64
20095 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
20096 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20097 ; VLX: # %bb.0: # %entry
20098 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
20099 ; VLX-NEXT: kmovq %k0, %rax
20102 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20103 ; NoVLX: # %bb.0: # %entry
20104 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20105 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
20106 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20107 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20108 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20109 ; NoVLX-NEXT: kmovw %k0, %eax
20110 ; NoVLX-NEXT: vzeroupper
20113 %0 = bitcast <2 x i64> %__a to <4 x float>
20114 %load = load float, float* %__b
20115 %vec = insertelement <4 x float> undef, float %load, i32 0
20116 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20117 %2 = fcmp oeq <4 x float> %0, %1
20118 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20119 %4 = bitcast <64 x i1> %3 to i64
20123 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20124 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
20125 ; VLX: # %bb.0: # %entry
20126 ; VLX-NEXT: kmovd %edi, %k1
20127 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
20128 ; VLX-NEXT: kmovq %k0, %rax
20131 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
20132 ; NoVLX: # %bb.0: # %entry
20133 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
20134 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20135 ; NoVLX-NEXT: kmovw %edi, %k1
20136 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20137 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20138 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20139 ; NoVLX-NEXT: kmovw %k0, %eax
20140 ; NoVLX-NEXT: vzeroupper
20143 %0 = bitcast <2 x i64> %__a to <4 x float>
20144 %1 = bitcast <2 x i64> %__b to <4 x float>
20145 %2 = fcmp oeq <4 x float> %0, %1
20146 %3 = bitcast i4 %__u to <4 x i1>
20147 %4 = and <4 x i1> %2, %3
20148 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20149 %6 = bitcast <64 x i1> %5 to i64
20153 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20154 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20155 ; VLX: # %bb.0: # %entry
20156 ; VLX-NEXT: kmovd %edi, %k1
20157 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
20158 ; VLX-NEXT: kmovq %k0, %rax
20161 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20162 ; NoVLX: # %bb.0: # %entry
20163 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20164 ; NoVLX-NEXT: kmovw %edi, %k1
20165 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
20166 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20167 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20168 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20169 ; NoVLX-NEXT: kmovw %k0, %eax
20170 ; NoVLX-NEXT: vzeroupper
20173 %0 = bitcast <2 x i64> %__a to <4 x float>
20174 %load = load <2 x i64>, <2 x i64>* %__b
20175 %1 = bitcast <2 x i64> %load to <4 x float>
20176 %2 = fcmp oeq <4 x float> %0, %1
20177 %3 = bitcast i4 %__u to <4 x i1>
20178 %4 = and <4 x i1> %2, %3
20179 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20180 %6 = bitcast <64 x i1> %5 to i64
20184 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
20185 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20186 ; VLX: # %bb.0: # %entry
20187 ; VLX-NEXT: kmovd %edi, %k1
20188 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20189 ; VLX-NEXT: kmovq %k0, %rax
20192 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20193 ; NoVLX: # %bb.0: # %entry
20194 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20195 ; NoVLX-NEXT: kmovw %edi, %k1
20196 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1
20197 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20198 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20199 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20200 ; NoVLX-NEXT: kmovw %k0, %eax
20201 ; NoVLX-NEXT: vzeroupper
20204 %0 = bitcast <2 x i64> %__a to <4 x float>
20205 %load = load float, float* %__b
20206 %vec = insertelement <4 x float> undef, float %load, i32 0
20207 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20208 %2 = fcmp oeq <4 x float> %0, %1
20209 %3 = bitcast i4 %__u to <4 x i1>
20210 %4 = and <4 x i1> %2, %3
20211 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20212 %6 = bitcast <64 x i1> %5 to i64
20218 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20219 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20220 ; VLX: # %bb.0: # %entry
20221 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20222 ; VLX-NEXT: kmovd %k0, %eax
20223 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20224 ; VLX-NEXT: vzeroupper
20227 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20228 ; NoVLX: # %bb.0: # %entry
20229 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20230 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20231 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20232 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20233 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20234 ; NoVLX-NEXT: kmovw %k0, %eax
20235 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20236 ; NoVLX-NEXT: vzeroupper
20239 %0 = bitcast <4 x i64> %__a to <8 x float>
20240 %1 = bitcast <4 x i64> %__b to <8 x float>
20241 %2 = fcmp oeq <8 x float> %0, %1
20242 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20243 %4 = bitcast <16 x i1> %3 to i16
20247 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20248 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20249 ; VLX: # %bb.0: # %entry
20250 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20251 ; VLX-NEXT: kmovd %k0, %eax
20252 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20253 ; VLX-NEXT: vzeroupper
20256 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20257 ; NoVLX: # %bb.0: # %entry
20258 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20259 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20260 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20261 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20262 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20263 ; NoVLX-NEXT: kmovw %k0, %eax
20264 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20265 ; NoVLX-NEXT: vzeroupper
20268 %0 = bitcast <4 x i64> %__a to <8 x float>
20269 %load = load <4 x i64>, <4 x i64>* %__b
20270 %1 = bitcast <4 x i64> %load to <8 x float>
20271 %2 = fcmp oeq <8 x float> %0, %1
20272 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20273 %4 = bitcast <16 x i1> %3 to i16
20277 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20278 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20279 ; VLX: # %bb.0: # %entry
20280 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20281 ; VLX-NEXT: kmovd %k0, %eax
20282 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20283 ; VLX-NEXT: vzeroupper
20286 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20287 ; NoVLX: # %bb.0: # %entry
20288 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20289 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
20290 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20291 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20292 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20293 ; NoVLX-NEXT: kmovw %k0, %eax
20294 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20295 ; NoVLX-NEXT: vzeroupper
20298 %0 = bitcast <4 x i64> %__a to <8 x float>
20299 %load = load float, float* %__b
20300 %vec = insertelement <8 x float> undef, float %load, i32 0
20301 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20302 %2 = fcmp oeq <8 x float> %0, %1
20303 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20304 %4 = bitcast <16 x i1> %3 to i16
20308 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20309 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20310 ; VLX: # %bb.0: # %entry
20311 ; VLX-NEXT: kmovd %edi, %k1
20312 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20313 ; VLX-NEXT: kmovd %k0, %eax
20314 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20315 ; VLX-NEXT: vzeroupper
20318 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20319 ; NoVLX: # %bb.0: # %entry
20320 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20321 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20322 ; NoVLX-NEXT: kmovw %edi, %k1
20323 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20324 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20325 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20326 ; NoVLX-NEXT: kmovw %k0, %eax
20327 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20328 ; NoVLX-NEXT: vzeroupper
20331 %0 = bitcast <4 x i64> %__a to <8 x float>
20332 %1 = bitcast <4 x i64> %__b to <8 x float>
20333 %2 = fcmp oeq <8 x float> %0, %1
20334 %3 = bitcast i8 %__u to <8 x i1>
20335 %4 = and <8 x i1> %2, %3
20336 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20337 %6 = bitcast <16 x i1> %5 to i16
20341 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20342 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20343 ; VLX: # %bb.0: # %entry
20344 ; VLX-NEXT: kmovd %edi, %k1
20345 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20346 ; VLX-NEXT: kmovd %k0, %eax
20347 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20348 ; VLX-NEXT: vzeroupper
20351 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20352 ; NoVLX: # %bb.0: # %entry
20353 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20354 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20355 ; NoVLX-NEXT: kmovw %edi, %k1
20356 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20357 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20358 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20359 ; NoVLX-NEXT: kmovw %k0, %eax
20360 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20361 ; NoVLX-NEXT: vzeroupper
20364 %0 = bitcast <4 x i64> %__a to <8 x float>
20365 %load = load <4 x i64>, <4 x i64>* %__b
20366 %1 = bitcast <4 x i64> %load to <8 x float>
20367 %2 = fcmp oeq <8 x float> %0, %1
20368 %3 = bitcast i8 %__u to <8 x i1>
20369 %4 = and <8 x i1> %2, %3
20370 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20371 %6 = bitcast <16 x i1> %5 to i16
20375 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20376 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20377 ; VLX: # %bb.0: # %entry
20378 ; VLX-NEXT: kmovd %edi, %k1
20379 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20380 ; VLX-NEXT: kmovd %k0, %eax
20381 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20382 ; VLX-NEXT: vzeroupper
20385 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20386 ; NoVLX: # %bb.0: # %entry
20387 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20388 ; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
20389 ; NoVLX-NEXT: kmovw %edi, %k1
20390 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20391 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20392 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20393 ; NoVLX-NEXT: kmovw %k0, %eax
20394 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20395 ; NoVLX-NEXT: vzeroupper
20398 %0 = bitcast <4 x i64> %__a to <8 x float>
20399 %load = load float, float* %__b
20400 %vec = insertelement <8 x float> undef, float %load, i32 0
20401 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20402 %2 = fcmp oeq <8 x float> %0, %1
20403 %3 = bitcast i8 %__u to <8 x i1>
20404 %4 = and <8 x i1> %2, %3
20405 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20406 %6 = bitcast <16 x i1> %5 to i16
20412 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20413 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20414 ; VLX: # %bb.0: # %entry
20415 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20416 ; VLX-NEXT: kmovd %k0, %eax
20417 ; VLX-NEXT: vzeroupper
20420 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20421 ; NoVLX: # %bb.0: # %entry
20422 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20423 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20424 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20425 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20426 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20427 ; NoVLX-NEXT: kmovw %k0, %eax
20428 ; NoVLX-NEXT: vzeroupper
20431 %0 = bitcast <4 x i64> %__a to <8 x float>
20432 %1 = bitcast <4 x i64> %__b to <8 x float>
20433 %2 = fcmp oeq <8 x float> %0, %1
20434 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20435 %4 = bitcast <32 x i1> %3 to i32
20439 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20440 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20441 ; VLX: # %bb.0: # %entry
20442 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20443 ; VLX-NEXT: kmovd %k0, %eax
20444 ; VLX-NEXT: vzeroupper
20447 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20448 ; NoVLX: # %bb.0: # %entry
20449 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20450 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20451 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20452 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20453 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20454 ; NoVLX-NEXT: kmovw %k0, %eax
20455 ; NoVLX-NEXT: vzeroupper
20458 %0 = bitcast <4 x i64> %__a to <8 x float>
20459 %load = load <4 x i64>, <4 x i64>* %__b
20460 %1 = bitcast <4 x i64> %load to <8 x float>
20461 %2 = fcmp oeq <8 x float> %0, %1
20462 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20463 %4 = bitcast <32 x i1> %3 to i32
20467 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20468 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20469 ; VLX: # %bb.0: # %entry
20470 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20471 ; VLX-NEXT: kmovd %k0, %eax
20472 ; VLX-NEXT: vzeroupper
20475 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20476 ; NoVLX: # %bb.0: # %entry
20477 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20478 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
20479 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20480 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20481 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20482 ; NoVLX-NEXT: kmovw %k0, %eax
20483 ; NoVLX-NEXT: vzeroupper
20486 %0 = bitcast <4 x i64> %__a to <8 x float>
20487 %load = load float, float* %__b
20488 %vec = insertelement <8 x float> undef, float %load, i32 0
20489 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20490 %2 = fcmp oeq <8 x float> %0, %1
20491 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20492 %4 = bitcast <32 x i1> %3 to i32
20496 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20497 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20498 ; VLX: # %bb.0: # %entry
20499 ; VLX-NEXT: kmovd %edi, %k1
20500 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20501 ; VLX-NEXT: kmovd %k0, %eax
20502 ; VLX-NEXT: vzeroupper
20505 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20506 ; NoVLX: # %bb.0: # %entry
20507 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20508 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20509 ; NoVLX-NEXT: kmovw %edi, %k1
20510 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20511 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20512 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20513 ; NoVLX-NEXT: kmovw %k0, %eax
20514 ; NoVLX-NEXT: vzeroupper
20517 %0 = bitcast <4 x i64> %__a to <8 x float>
20518 %1 = bitcast <4 x i64> %__b to <8 x float>
20519 %2 = fcmp oeq <8 x float> %0, %1
20520 %3 = bitcast i8 %__u to <8 x i1>
20521 %4 = and <8 x i1> %2, %3
20522 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20523 %6 = bitcast <32 x i1> %5 to i32
20527 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20528 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20529 ; VLX: # %bb.0: # %entry
20530 ; VLX-NEXT: kmovd %edi, %k1
20531 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20532 ; VLX-NEXT: kmovd %k0, %eax
20533 ; VLX-NEXT: vzeroupper
20536 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20537 ; NoVLX: # %bb.0: # %entry
20538 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20539 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20540 ; NoVLX-NEXT: kmovw %edi, %k1
20541 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20542 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20543 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20544 ; NoVLX-NEXT: kmovw %k0, %eax
20545 ; NoVLX-NEXT: vzeroupper
20548 %0 = bitcast <4 x i64> %__a to <8 x float>
20549 %load = load <4 x i64>, <4 x i64>* %__b
20550 %1 = bitcast <4 x i64> %load to <8 x float>
20551 %2 = fcmp oeq <8 x float> %0, %1
20552 %3 = bitcast i8 %__u to <8 x i1>
20553 %4 = and <8 x i1> %2, %3
20554 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20555 %6 = bitcast <32 x i1> %5 to i32
20559 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20560 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20561 ; VLX: # %bb.0: # %entry
20562 ; VLX-NEXT: kmovd %edi, %k1
20563 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20564 ; VLX-NEXT: kmovd %k0, %eax
20565 ; VLX-NEXT: vzeroupper
20568 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20569 ; NoVLX: # %bb.0: # %entry
20570 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20571 ; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
20572 ; NoVLX-NEXT: kmovw %edi, %k1
20573 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20574 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20575 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20576 ; NoVLX-NEXT: kmovw %k0, %eax
20577 ; NoVLX-NEXT: vzeroupper
20580 %0 = bitcast <4 x i64> %__a to <8 x float>
20581 %load = load float, float* %__b
20582 %vec = insertelement <8 x float> undef, float %load, i32 0
20583 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20584 %2 = fcmp oeq <8 x float> %0, %1
20585 %3 = bitcast i8 %__u to <8 x i1>
20586 %4 = and <8 x i1> %2, %3
20587 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20588 %6 = bitcast <32 x i1> %5 to i32
20594 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20595 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20596 ; VLX: # %bb.0: # %entry
20597 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20598 ; VLX-NEXT: kmovq %k0, %rax
20599 ; VLX-NEXT: vzeroupper
20602 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20603 ; NoVLX: # %bb.0: # %entry
20604 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20605 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20606 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20607 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20608 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20609 ; NoVLX-NEXT: kmovw %k0, %eax
20610 ; NoVLX-NEXT: vzeroupper
20613 %0 = bitcast <4 x i64> %__a to <8 x float>
20614 %1 = bitcast <4 x i64> %__b to <8 x float>
20615 %2 = fcmp oeq <8 x float> %0, %1
20616 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20617 %4 = bitcast <64 x i1> %3 to i64
20621 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20622 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20623 ; VLX: # %bb.0: # %entry
20624 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20625 ; VLX-NEXT: kmovq %k0, %rax
20626 ; VLX-NEXT: vzeroupper
20629 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20630 ; NoVLX: # %bb.0: # %entry
20631 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20632 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20633 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20634 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20635 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20636 ; NoVLX-NEXT: kmovw %k0, %eax
20637 ; NoVLX-NEXT: vzeroupper
20640 %0 = bitcast <4 x i64> %__a to <8 x float>
20641 %load = load <4 x i64>, <4 x i64>* %__b
20642 %1 = bitcast <4 x i64> %load to <8 x float>
20643 %2 = fcmp oeq <8 x float> %0, %1
20644 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20645 %4 = bitcast <64 x i1> %3 to i64
20649 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20650 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20651 ; VLX: # %bb.0: # %entry
20652 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20653 ; VLX-NEXT: kmovq %k0, %rax
20654 ; VLX-NEXT: vzeroupper
20657 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20658 ; NoVLX: # %bb.0: # %entry
20659 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20660 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
20661 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20662 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20663 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20664 ; NoVLX-NEXT: kmovw %k0, %eax
20665 ; NoVLX-NEXT: vzeroupper
20668 %0 = bitcast <4 x i64> %__a to <8 x float>
20669 %load = load float, float* %__b
20670 %vec = insertelement <8 x float> undef, float %load, i32 0
20671 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20672 %2 = fcmp oeq <8 x float> %0, %1
20673 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20674 %4 = bitcast <64 x i1> %3 to i64
20678 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20679 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20680 ; VLX: # %bb.0: # %entry
20681 ; VLX-NEXT: kmovd %edi, %k1
20682 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20683 ; VLX-NEXT: kmovq %k0, %rax
20684 ; VLX-NEXT: vzeroupper
20687 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20688 ; NoVLX: # %bb.0: # %entry
20689 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20690 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20691 ; NoVLX-NEXT: kmovw %edi, %k1
20692 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20693 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20694 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20695 ; NoVLX-NEXT: kmovw %k0, %eax
20696 ; NoVLX-NEXT: vzeroupper
20699 %0 = bitcast <4 x i64> %__a to <8 x float>
20700 %1 = bitcast <4 x i64> %__b to <8 x float>
20701 %2 = fcmp oeq <8 x float> %0, %1
20702 %3 = bitcast i8 %__u to <8 x i1>
20703 %4 = and <8 x i1> %2, %3
20704 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20705 %6 = bitcast <64 x i1> %5 to i64
20709 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20710 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20711 ; VLX: # %bb.0: # %entry
20712 ; VLX-NEXT: kmovd %edi, %k1
20713 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20714 ; VLX-NEXT: kmovq %k0, %rax
20715 ; VLX-NEXT: vzeroupper
20718 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20719 ; NoVLX: # %bb.0: # %entry
20720 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20721 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20722 ; NoVLX-NEXT: kmovw %edi, %k1
20723 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20724 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20725 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20726 ; NoVLX-NEXT: kmovw %k0, %eax
20727 ; NoVLX-NEXT: vzeroupper
20730 %0 = bitcast <4 x i64> %__a to <8 x float>
20731 %load = load <4 x i64>, <4 x i64>* %__b
20732 %1 = bitcast <4 x i64> %load to <8 x float>
20733 %2 = fcmp oeq <8 x float> %0, %1
20734 %3 = bitcast i8 %__u to <8 x i1>
20735 %4 = and <8 x i1> %2, %3
20736 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20737 %6 = bitcast <64 x i1> %5 to i64
20741 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20742 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20743 ; VLX: # %bb.0: # %entry
20744 ; VLX-NEXT: kmovd %edi, %k1
20745 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20746 ; VLX-NEXT: kmovq %k0, %rax
20747 ; VLX-NEXT: vzeroupper
20750 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20751 ; NoVLX: # %bb.0: # %entry
20752 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20753 ; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
20754 ; NoVLX-NEXT: kmovw %edi, %k1
20755 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20756 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20757 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20758 ; NoVLX-NEXT: kmovw %k0, %eax
20759 ; NoVLX-NEXT: vzeroupper
20762 %0 = bitcast <4 x i64> %__a to <8 x float>
20763 %load = load float, float* %__b
20764 %vec = insertelement <8 x float> undef, float %load, i32 0
20765 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20766 %2 = fcmp oeq <8 x float> %0, %1
20767 %3 = bitcast i8 %__u to <8 x i1>
20768 %4 = and <8 x i1> %2, %3
20769 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20770 %6 = bitcast <64 x i1> %5 to i64
20776 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20777 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
20778 ; VLX: # %bb.0: # %entry
20779 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20780 ; VLX-NEXT: kmovd %k0, %eax
20781 ; VLX-NEXT: vzeroupper
20784 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
20785 ; NoVLX: # %bb.0: # %entry
20786 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20787 ; NoVLX-NEXT: kmovw %k0, %eax
20788 ; NoVLX-NEXT: vzeroupper
20791 %0 = bitcast <8 x i64> %__a to <16 x float>
20792 %1 = bitcast <8 x i64> %__b to <16 x float>
20793 %2 = fcmp oeq <16 x float> %0, %1
20794 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20795 %4 = bitcast <32 x i1> %3 to i32
20799 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
20800 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
20801 ; VLX: # %bb.0: # %entry
20802 ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
20803 ; VLX-NEXT: kmovd %k0, %eax
20804 ; VLX-NEXT: vzeroupper
20807 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
20808 ; NoVLX: # %bb.0: # %entry
20809 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
20810 ; NoVLX-NEXT: kmovw %k0, %eax
20811 ; NoVLX-NEXT: vzeroupper
20814 %0 = bitcast <8 x i64> %__a to <16 x float>
20815 %load = load <8 x i64>, <8 x i64>* %__b
20816 %1 = bitcast <8 x i64> %load to <16 x float>
20817 %2 = fcmp oeq <16 x float> %0, %1
20818 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20819 %4 = bitcast <32 x i1> %3 to i32
20823 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
20824 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20825 ; VLX: # %bb.0: # %entry
20826 ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20827 ; VLX-NEXT: kmovd %k0, %eax
20828 ; VLX-NEXT: vzeroupper
20831 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20832 ; NoVLX: # %bb.0: # %entry
20833 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20834 ; NoVLX-NEXT: kmovw %k0, %eax
20835 ; NoVLX-NEXT: vzeroupper
20838 %0 = bitcast <8 x i64> %__a to <16 x float>
20839 %load = load float, float* %__b
20840 %vec = insertelement <16 x float> undef, float %load, i32 0
20841 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20842 %2 = fcmp oeq <16 x float> %0, %1
20843 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20844 %4 = bitcast <32 x i1> %3 to i32
20848 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20849 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
20850 ; VLX: # %bb.0: # %entry
20851 ; VLX-NEXT: kmovd %edi, %k1
20852 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20853 ; VLX-NEXT: kmovd %k0, %eax
20854 ; VLX-NEXT: vzeroupper
20857 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
20858 ; NoVLX: # %bb.0: # %entry
20859 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20860 ; NoVLX-NEXT: kmovw %k0, %eax
20861 ; NoVLX-NEXT: andl %edi, %eax
20862 ; NoVLX-NEXT: vzeroupper
20865 %0 = bitcast <8 x i64> %__a to <16 x float>
20866 %1 = bitcast <8 x i64> %__b to <16 x float>
20867 %2 = fcmp oeq <16 x float> %0, %1
20868 %3 = bitcast i16 %__u to <16 x i1>
20869 %4 = and <16 x i1> %2, %3
20870 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20871 %6 = bitcast <32 x i1> %5 to i32
20875 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
20876 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
20877 ; VLX: # %bb.0: # %entry
20878 ; VLX-NEXT: kmovd %edi, %k1
20879 ; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
20880 ; VLX-NEXT: kmovd %k0, %eax
20881 ; VLX-NEXT: vzeroupper
20884 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
20885 ; NoVLX: # %bb.0: # %entry
20886 ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0
20887 ; NoVLX-NEXT: kmovw %k0, %eax
20888 ; NoVLX-NEXT: andl %edi, %eax
20889 ; NoVLX-NEXT: vzeroupper
20892 %0 = bitcast <8 x i64> %__a to <16 x float>
20893 %load = load <8 x i64>, <8 x i64>* %__b
20894 %1 = bitcast <8 x i64> %load to <16 x float>
20895 %2 = fcmp oeq <16 x float> %0, %1
20896 %3 = bitcast i16 %__u to <16 x i1>
20897 %4 = and <16 x i1> %2, %3
20898 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20899 %6 = bitcast <32 x i1> %5 to i32
20903 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
20904 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20905 ; VLX: # %bb.0: # %entry
20906 ; VLX-NEXT: kmovd %edi, %k1
20907 ; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20908 ; VLX-NEXT: kmovd %k0, %eax
20909 ; VLX-NEXT: vzeroupper
20912 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20913 ; NoVLX: # %bb.0: # %entry
20914 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0
20915 ; NoVLX-NEXT: kmovw %k0, %eax
20916 ; NoVLX-NEXT: andl %edi, %eax
20917 ; NoVLX-NEXT: vzeroupper
20920 %0 = bitcast <8 x i64> %__a to <16 x float>
20921 %load = load float, float* %__b
20922 %vec = insertelement <16 x float> undef, float %load, i32 0
20923 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20924 %2 = fcmp oeq <16 x float> %0, %1
20925 %3 = bitcast i16 %__u to <16 x i1>
20926 %4 = and <16 x i1> %2, %3
20927 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20928 %6 = bitcast <32 x i1> %5 to i32
20934 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20935 ; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
20936 ; CHECK: # %bb.0: # %entry
20937 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
20938 ; CHECK-NEXT: kmovw %k0, %eax
20939 ; CHECK-NEXT: vzeroupper
20942 %0 = bitcast <8 x i64> %__a to <16 x float>
20943 %1 = bitcast <8 x i64> %__b to <16 x float>
20944 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
20945 %3 = bitcast <16 x i1> %2 to i16
20946 %4 = zext i16 %3 to i32
20950 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20951 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
20952 ; VLX: # %bb.0: # %entry
20953 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
20954 ; VLX-NEXT: kmovd %k0, %eax
20955 ; VLX-NEXT: andl %edi, %eax
20956 ; VLX-NEXT: vzeroupper
20959 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
20960 ; NoVLX: # %bb.0: # %entry
20961 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
20962 ; NoVLX-NEXT: kmovw %k0, %eax
20963 ; NoVLX-NEXT: andl %edi, %eax
20964 ; NoVLX-NEXT: vzeroupper
20967 %0 = bitcast <8 x i64> %__a to <16 x float>
20968 %1 = bitcast <8 x i64> %__b to <16 x float>
20969 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
20970 %3 = bitcast i16 %__u to <16 x i1>
20971 %4 = and <16 x i1> %2, %3
20972 %5 = bitcast <16 x i1> %4 to i16
20973 %6 = zext i16 %5 to i32
20979 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20980 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
20981 ; VLX: # %bb.0: # %entry
20982 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20983 ; VLX-NEXT: kmovq %k0, %rax
20984 ; VLX-NEXT: vzeroupper
20987 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
20988 ; NoVLX: # %bb.0: # %entry
20989 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20990 ; NoVLX-NEXT: kmovw %k0, %eax
20991 ; NoVLX-NEXT: vzeroupper
20994 %0 = bitcast <8 x i64> %__a to <16 x float>
20995 %1 = bitcast <8 x i64> %__b to <16 x float>
20996 %2 = fcmp oeq <16 x float> %0, %1
20997 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20998 %4 = bitcast <64 x i1> %3 to i64
21002 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
21003 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
21004 ; VLX: # %bb.0: # %entry
21005 ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
21006 ; VLX-NEXT: kmovq %k0, %rax
21007 ; VLX-NEXT: vzeroupper
21010 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
21011 ; NoVLX: # %bb.0: # %entry
21012 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
21013 ; NoVLX-NEXT: kmovw %k0, %eax
21014 ; NoVLX-NEXT: vzeroupper
21017 %0 = bitcast <8 x i64> %__a to <16 x float>
21018 %load = load <8 x i64>, <8 x i64>* %__b
21019 %1 = bitcast <8 x i64> %load to <16 x float>
21020 %2 = fcmp oeq <16 x float> %0, %1
21021 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21022 %4 = bitcast <64 x i1> %3 to i64
21026 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
21027 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21028 ; VLX: # %bb.0: # %entry
21029 ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
21030 ; VLX-NEXT: kmovq %k0, %rax
21031 ; VLX-NEXT: vzeroupper
21034 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21035 ; NoVLX: # %bb.0: # %entry
21036 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
21037 ; NoVLX-NEXT: kmovw %k0, %eax
21038 ; NoVLX-NEXT: vzeroupper
21041 %0 = bitcast <8 x i64> %__a to <16 x float>
21042 %load = load float, float* %__b
21043 %vec = insertelement <16 x float> undef, float %load, i32 0
21044 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21045 %2 = fcmp oeq <16 x float> %0, %1
21046 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21047 %4 = bitcast <64 x i1> %3 to i64
21051 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21052 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
21053 ; VLX: # %bb.0: # %entry
21054 ; VLX-NEXT: kmovd %edi, %k1
21055 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
21056 ; VLX-NEXT: kmovq %k0, %rax
21057 ; VLX-NEXT: vzeroupper
21060 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
21061 ; NoVLX: # %bb.0: # %entry
21062 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
21063 ; NoVLX-NEXT: kmovw %k0, %eax
21064 ; NoVLX-NEXT: andl %edi, %eax
21065 ; NoVLX-NEXT: vzeroupper
21068 %0 = bitcast <8 x i64> %__a to <16 x float>
21069 %1 = bitcast <8 x i64> %__b to <16 x float>
21070 %2 = fcmp oeq <16 x float> %0, %1
21071 %3 = bitcast i16 %__u to <16 x i1>
21072 %4 = and <16 x i1> %2, %3
21073 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21074 %6 = bitcast <64 x i1> %5 to i64
21078 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
21079 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
21080 ; VLX: # %bb.0: # %entry
21081 ; VLX-NEXT: kmovd %edi, %k1
21082 ; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
21083 ; VLX-NEXT: kmovq %k0, %rax
21084 ; VLX-NEXT: vzeroupper
21087 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
21088 ; NoVLX: # %bb.0: # %entry
21089 ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0
21090 ; NoVLX-NEXT: kmovw %k0, %eax
21091 ; NoVLX-NEXT: andl %edi, %eax
21092 ; NoVLX-NEXT: vzeroupper
21095 %0 = bitcast <8 x i64> %__a to <16 x float>
21096 %load = load <8 x i64>, <8 x i64>* %__b
21097 %1 = bitcast <8 x i64> %load to <16 x float>
21098 %2 = fcmp oeq <16 x float> %0, %1
21099 %3 = bitcast i16 %__u to <16 x i1>
21100 %4 = and <16 x i1> %2, %3
21101 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21102 %6 = bitcast <64 x i1> %5 to i64
21106 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
21107 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21108 ; VLX: # %bb.0: # %entry
21109 ; VLX-NEXT: kmovd %edi, %k1
21110 ; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
21111 ; VLX-NEXT: kmovq %k0, %rax
21112 ; VLX-NEXT: vzeroupper
21115 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21116 ; NoVLX: # %bb.0: # %entry
21117 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0
21118 ; NoVLX-NEXT: kmovw %k0, %eax
21119 ; NoVLX-NEXT: andl %edi, %eax
21120 ; NoVLX-NEXT: vzeroupper
21123 %0 = bitcast <8 x i64> %__a to <16 x float>
21124 %load = load float, float* %__b
21125 %vec = insertelement <16 x float> undef, float %load, i32 0
21126 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21127 %2 = fcmp oeq <16 x float> %0, %1
21128 %3 = bitcast i16 %__u to <16 x i1>
21129 %4 = and <16 x i1> %2, %3
21130 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21131 %6 = bitcast <64 x i1> %5 to i64
21137 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21138 ; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
21139 ; CHECK: # %bb.0: # %entry
21140 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21141 ; CHECK-NEXT: kmovw %k0, %eax
21142 ; CHECK-NEXT: vzeroupper
21145 %0 = bitcast <8 x i64> %__a to <16 x float>
21146 %1 = bitcast <8 x i64> %__b to <16 x float>
21147 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21148 %3 = bitcast <16 x i1> %2 to i16
21149 %4 = zext i16 %3 to i64
21153 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21154 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21155 ; VLX: # %bb.0: # %entry
21156 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21157 ; VLX-NEXT: kmovd %k0, %eax
21158 ; VLX-NEXT: andl %edi, %eax
21159 ; VLX-NEXT: vzeroupper
21162 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21163 ; NoVLX: # %bb.0: # %entry
21164 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21165 ; NoVLX-NEXT: kmovw %k0, %eax
21166 ; NoVLX-NEXT: andl %edi, %eax
21167 ; NoVLX-NEXT: vzeroupper
21170 %0 = bitcast <8 x i64> %__a to <16 x float>
21171 %1 = bitcast <8 x i64> %__b to <16 x float>
21172 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21173 %3 = bitcast i16 %__u to <16 x i1>
21174 %4 = and <16 x i1> %2, %3
21175 %5 = bitcast <16 x i1> %4 to i16
21176 %6 = zext i16 %5 to i64
21182 declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
21183 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21184 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21185 ; VLX: # %bb.0: # %entry
21186 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21187 ; VLX-NEXT: kmovb %k0, %eax
21190 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21191 ; NoVLX: # %bb.0: # %entry
21192 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21193 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21194 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21195 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21196 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21197 ; NoVLX-NEXT: kmovw %k0, %eax
21198 ; NoVLX-NEXT: vzeroupper
21201 %0 = bitcast <2 x i64> %__a to <2 x double>
21202 %1 = bitcast <2 x i64> %__b to <2 x double>
21203 %2 = fcmp oeq <2 x double> %0, %1
21204 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21205 %4 = bitcast <4 x i1> %3 to i4
21209 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21210 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21211 ; VLX: # %bb.0: # %entry
21212 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21213 ; VLX-NEXT: kmovb %k0, %eax
21216 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21217 ; NoVLX: # %bb.0: # %entry
21218 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21219 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21220 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21221 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21222 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21223 ; NoVLX-NEXT: kmovw %k0, %eax
21224 ; NoVLX-NEXT: vzeroupper
21227 %0 = bitcast <2 x i64> %__a to <2 x double>
21228 %load = load <2 x i64>, <2 x i64>* %__b
21229 %1 = bitcast <2 x i64> %load to <2 x double>
21230 %2 = fcmp oeq <2 x double> %0, %1
21231 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21232 %4 = bitcast <4 x i1> %3 to i4
21236 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21237 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21238 ; VLX: # %bb.0: # %entry
21239 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21240 ; VLX-NEXT: kmovb %k0, %eax
21243 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21244 ; NoVLX: # %bb.0: # %entry
21245 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21246 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21247 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21248 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21249 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21250 ; NoVLX-NEXT: kmovw %k0, %eax
21251 ; NoVLX-NEXT: vzeroupper
21254 %0 = bitcast <2 x i64> %__a to <2 x double>
21255 %load = load double, double* %__b
21256 %vec = insertelement <2 x double> undef, double %load, i32 0
21257 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21258 %2 = fcmp oeq <2 x double> %0, %1
21259 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21260 %4 = bitcast <4 x i1> %3 to i4
21264 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21265 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21266 ; VLX: # %bb.0: # %entry
21267 ; VLX-NEXT: kmovd %edi, %k1
21268 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21269 ; VLX-NEXT: kmovb %k0, %eax
21272 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21273 ; NoVLX: # %bb.0: # %entry
21274 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21275 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21276 ; NoVLX-NEXT: kmovw %edi, %k1
21277 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21278 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21279 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21280 ; NoVLX-NEXT: kmovw %k0, %eax
21281 ; NoVLX-NEXT: vzeroupper
21284 %0 = bitcast <2 x i64> %__a to <2 x double>
21285 %1 = bitcast <2 x i64> %__b to <2 x double>
21286 %2 = fcmp oeq <2 x double> %0, %1
21287 %3 = bitcast i2 %__u to <2 x i1>
21288 %4 = and <2 x i1> %2, %3
21289 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21290 %6 = bitcast <4 x i1> %5 to i4
21294 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21295 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21296 ; VLX: # %bb.0: # %entry
21297 ; VLX-NEXT: kmovd %edi, %k1
21298 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21299 ; VLX-NEXT: kmovb %k0, %eax
21302 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21303 ; NoVLX: # %bb.0: # %entry
21304 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21305 ; NoVLX-NEXT: kmovw %edi, %k1
21306 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21307 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21308 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21309 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21310 ; NoVLX-NEXT: kmovw %k0, %eax
21311 ; NoVLX-NEXT: vzeroupper
21314 %0 = bitcast <2 x i64> %__a to <2 x double>
21315 %load = load <2 x i64>, <2 x i64>* %__b
21316 %1 = bitcast <2 x i64> %load to <2 x double>
21317 %2 = fcmp oeq <2 x double> %0, %1
21318 %3 = bitcast i2 %__u to <2 x i1>
21319 %4 = and <2 x i1> %2, %3
21320 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21321 %6 = bitcast <4 x i1> %5 to i4
21325 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21326 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21327 ; VLX: # %bb.0: # %entry
21328 ; VLX-NEXT: kmovd %edi, %k1
21329 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21330 ; VLX-NEXT: kmovb %k0, %eax
21333 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21334 ; NoVLX: # %bb.0: # %entry
21335 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21336 ; NoVLX-NEXT: kmovw %edi, %k1
21337 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21338 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21339 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21340 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21341 ; NoVLX-NEXT: kmovw %k0, %eax
21342 ; NoVLX-NEXT: vzeroupper
21345 %0 = bitcast <2 x i64> %__a to <2 x double>
21346 %load = load double, double* %__b
21347 %vec = insertelement <2 x double> undef, double %load, i32 0
21348 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21349 %2 = fcmp oeq <2 x double> %0, %1
21350 %3 = bitcast i2 %__u to <2 x i1>
21351 %4 = and <2 x i1> %2, %3
21352 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21353 %6 = bitcast <4 x i1> %5 to i4
21359 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21360 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21361 ; VLX: # %bb.0: # %entry
21362 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21363 ; VLX-NEXT: kmovd %k0, %eax
21364 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21367 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21368 ; NoVLX: # %bb.0: # %entry
21369 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21370 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21371 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21372 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21373 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21374 ; NoVLX-NEXT: kmovw %k0, %eax
21375 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21376 ; NoVLX-NEXT: vzeroupper
21379 %0 = bitcast <2 x i64> %__a to <2 x double>
21380 %1 = bitcast <2 x i64> %__b to <2 x double>
21381 %2 = fcmp oeq <2 x double> %0, %1
21382 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21383 %4 = bitcast <8 x i1> %3 to i8
21387 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21388 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21389 ; VLX: # %bb.0: # %entry
21390 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21391 ; VLX-NEXT: kmovd %k0, %eax
21392 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21395 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21396 ; NoVLX: # %bb.0: # %entry
21397 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21398 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21399 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21400 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21401 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21402 ; NoVLX-NEXT: kmovw %k0, %eax
21403 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21404 ; NoVLX-NEXT: vzeroupper
21407 %0 = bitcast <2 x i64> %__a to <2 x double>
21408 %load = load <2 x i64>, <2 x i64>* %__b
21409 %1 = bitcast <2 x i64> %load to <2 x double>
21410 %2 = fcmp oeq <2 x double> %0, %1
21411 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21412 %4 = bitcast <8 x i1> %3 to i8
21416 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21417 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21418 ; VLX: # %bb.0: # %entry
21419 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21420 ; VLX-NEXT: kmovd %k0, %eax
21421 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21424 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21425 ; NoVLX: # %bb.0: # %entry
21426 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21427 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21428 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21429 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21430 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21431 ; NoVLX-NEXT: kmovw %k0, %eax
21432 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21433 ; NoVLX-NEXT: vzeroupper
21436 %0 = bitcast <2 x i64> %__a to <2 x double>
21437 %load = load double, double* %__b
21438 %vec = insertelement <2 x double> undef, double %load, i32 0
21439 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21440 %2 = fcmp oeq <2 x double> %0, %1
21441 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21442 %4 = bitcast <8 x i1> %3 to i8
21446 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21447 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21448 ; VLX: # %bb.0: # %entry
21449 ; VLX-NEXT: kmovd %edi, %k1
21450 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21451 ; VLX-NEXT: kmovd %k0, %eax
21452 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21455 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21456 ; NoVLX: # %bb.0: # %entry
21457 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21458 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21459 ; NoVLX-NEXT: kmovw %edi, %k1
21460 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21461 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21462 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21463 ; NoVLX-NEXT: kmovw %k0, %eax
21464 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21465 ; NoVLX-NEXT: vzeroupper
21468 %0 = bitcast <2 x i64> %__a to <2 x double>
21469 %1 = bitcast <2 x i64> %__b to <2 x double>
21470 %2 = fcmp oeq <2 x double> %0, %1
21471 %3 = bitcast i2 %__u to <2 x i1>
21472 %4 = and <2 x i1> %2, %3
21473 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21474 %6 = bitcast <8 x i1> %5 to i8
21478 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21479 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21480 ; VLX: # %bb.0: # %entry
21481 ; VLX-NEXT: kmovd %edi, %k1
21482 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21483 ; VLX-NEXT: kmovd %k0, %eax
21484 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21487 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21488 ; NoVLX: # %bb.0: # %entry
21489 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21490 ; NoVLX-NEXT: kmovw %edi, %k1
21491 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21492 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21493 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21494 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21495 ; NoVLX-NEXT: kmovw %k0, %eax
21496 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21497 ; NoVLX-NEXT: vzeroupper
21500 %0 = bitcast <2 x i64> %__a to <2 x double>
21501 %load = load <2 x i64>, <2 x i64>* %__b
21502 %1 = bitcast <2 x i64> %load to <2 x double>
21503 %2 = fcmp oeq <2 x double> %0, %1
21504 %3 = bitcast i2 %__u to <2 x i1>
21505 %4 = and <2 x i1> %2, %3
21506 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21507 %6 = bitcast <8 x i1> %5 to i8
21511 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21512 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21513 ; VLX: # %bb.0: # %entry
21514 ; VLX-NEXT: kmovd %edi, %k1
21515 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21516 ; VLX-NEXT: kmovd %k0, %eax
21517 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21520 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21521 ; NoVLX: # %bb.0: # %entry
21522 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21523 ; NoVLX-NEXT: kmovw %edi, %k1
21524 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21525 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21526 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21527 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21528 ; NoVLX-NEXT: kmovw %k0, %eax
21529 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21530 ; NoVLX-NEXT: vzeroupper
21533 %0 = bitcast <2 x i64> %__a to <2 x double>
21534 %load = load double, double* %__b
21535 %vec = insertelement <2 x double> undef, double %load, i32 0
21536 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21537 %2 = fcmp oeq <2 x double> %0, %1
21538 %3 = bitcast i2 %__u to <2 x i1>
21539 %4 = and <2 x i1> %2, %3
21540 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21541 %6 = bitcast <8 x i1> %5 to i8
21547 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21548 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21549 ; VLX: # %bb.0: # %entry
21550 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21551 ; VLX-NEXT: kmovd %k0, %eax
21552 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21555 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21556 ; NoVLX: # %bb.0: # %entry
21557 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21558 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21559 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21560 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21561 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21562 ; NoVLX-NEXT: kmovw %k0, %eax
21563 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21564 ; NoVLX-NEXT: vzeroupper
21567 %0 = bitcast <2 x i64> %__a to <2 x double>
21568 %1 = bitcast <2 x i64> %__b to <2 x double>
21569 %2 = fcmp oeq <2 x double> %0, %1
21570 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21571 %4 = bitcast <16 x i1> %3 to i16
21575 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21576 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21577 ; VLX: # %bb.0: # %entry
21578 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21579 ; VLX-NEXT: kmovd %k0, %eax
21580 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21583 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21584 ; NoVLX: # %bb.0: # %entry
21585 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21586 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21587 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21588 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21589 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21590 ; NoVLX-NEXT: kmovw %k0, %eax
21591 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21592 ; NoVLX-NEXT: vzeroupper
21595 %0 = bitcast <2 x i64> %__a to <2 x double>
21596 %load = load <2 x i64>, <2 x i64>* %__b
21597 %1 = bitcast <2 x i64> %load to <2 x double>
21598 %2 = fcmp oeq <2 x double> %0, %1
21599 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21600 %4 = bitcast <16 x i1> %3 to i16
21604 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21605 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21606 ; VLX: # %bb.0: # %entry
21607 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21608 ; VLX-NEXT: kmovd %k0, %eax
21609 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21612 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21613 ; NoVLX: # %bb.0: # %entry
21614 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21615 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21616 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21617 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21618 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21619 ; NoVLX-NEXT: kmovw %k0, %eax
21620 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21621 ; NoVLX-NEXT: vzeroupper
21624 %0 = bitcast <2 x i64> %__a to <2 x double>
21625 %load = load double, double* %__b
21626 %vec = insertelement <2 x double> undef, double %load, i32 0
21627 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21628 %2 = fcmp oeq <2 x double> %0, %1
21629 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21630 %4 = bitcast <16 x i1> %3 to i16
21634 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21635 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21636 ; VLX: # %bb.0: # %entry
21637 ; VLX-NEXT: kmovd %edi, %k1
21638 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21639 ; VLX-NEXT: kmovd %k0, %eax
21640 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21643 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21644 ; NoVLX: # %bb.0: # %entry
21645 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21646 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21647 ; NoVLX-NEXT: kmovw %edi, %k1
21648 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21649 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21650 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21651 ; NoVLX-NEXT: kmovw %k0, %eax
21652 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21653 ; NoVLX-NEXT: vzeroupper
21656 %0 = bitcast <2 x i64> %__a to <2 x double>
21657 %1 = bitcast <2 x i64> %__b to <2 x double>
21658 %2 = fcmp oeq <2 x double> %0, %1
21659 %3 = bitcast i2 %__u to <2 x i1>
21660 %4 = and <2 x i1> %2, %3
21661 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21662 %6 = bitcast <16 x i1> %5 to i16
21666 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21667 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21668 ; VLX: # %bb.0: # %entry
21669 ; VLX-NEXT: kmovd %edi, %k1
21670 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21671 ; VLX-NEXT: kmovd %k0, %eax
21672 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21675 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21676 ; NoVLX: # %bb.0: # %entry
21677 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21678 ; NoVLX-NEXT: kmovw %edi, %k1
21679 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21680 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21681 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21682 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21683 ; NoVLX-NEXT: kmovw %k0, %eax
21684 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21685 ; NoVLX-NEXT: vzeroupper
21688 %0 = bitcast <2 x i64> %__a to <2 x double>
21689 %load = load <2 x i64>, <2 x i64>* %__b
21690 %1 = bitcast <2 x i64> %load to <2 x double>
21691 %2 = fcmp oeq <2 x double> %0, %1
21692 %3 = bitcast i2 %__u to <2 x i1>
21693 %4 = and <2 x i1> %2, %3
21694 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21695 %6 = bitcast <16 x i1> %5 to i16
21699 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21700 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21701 ; VLX: # %bb.0: # %entry
21702 ; VLX-NEXT: kmovd %edi, %k1
21703 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21704 ; VLX-NEXT: kmovd %k0, %eax
21705 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21708 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21709 ; NoVLX: # %bb.0: # %entry
21710 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21711 ; NoVLX-NEXT: kmovw %edi, %k1
21712 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21713 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21714 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21715 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21716 ; NoVLX-NEXT: kmovw %k0, %eax
21717 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21718 ; NoVLX-NEXT: vzeroupper
21721 %0 = bitcast <2 x i64> %__a to <2 x double>
21722 %load = load double, double* %__b
21723 %vec = insertelement <2 x double> undef, double %load, i32 0
21724 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21725 %2 = fcmp oeq <2 x double> %0, %1
21726 %3 = bitcast i2 %__u to <2 x i1>
21727 %4 = and <2 x i1> %2, %3
21728 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21729 %6 = bitcast <16 x i1> %5 to i16
21735 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21736 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
21737 ; VLX: # %bb.0: # %entry
21738 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21739 ; VLX-NEXT: kmovd %k0, %eax
21742 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
21743 ; NoVLX: # %bb.0: # %entry
21744 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21745 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21746 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21747 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21748 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21749 ; NoVLX-NEXT: kmovw %k0, %eax
21750 ; NoVLX-NEXT: vzeroupper
21753 %0 = bitcast <2 x i64> %__a to <2 x double>
21754 %1 = bitcast <2 x i64> %__b to <2 x double>
21755 %2 = fcmp oeq <2 x double> %0, %1
21756 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21757 %4 = bitcast <32 x i1> %3 to i32
21761 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21762 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
21763 ; VLX: # %bb.0: # %entry
21764 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21765 ; VLX-NEXT: kmovd %k0, %eax
21768 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
21769 ; NoVLX: # %bb.0: # %entry
21770 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21771 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21772 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21773 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21774 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21775 ; NoVLX-NEXT: kmovw %k0, %eax
21776 ; NoVLX-NEXT: vzeroupper
21779 %0 = bitcast <2 x i64> %__a to <2 x double>
21780 %load = load <2 x i64>, <2 x i64>* %__b
21781 %1 = bitcast <2 x i64> %load to <2 x double>
21782 %2 = fcmp oeq <2 x double> %0, %1
21783 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21784 %4 = bitcast <32 x i1> %3 to i32
21788 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21789 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21790 ; VLX: # %bb.0: # %entry
21791 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21792 ; VLX-NEXT: kmovd %k0, %eax
21795 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21796 ; NoVLX: # %bb.0: # %entry
21797 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21798 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21799 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21800 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21801 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21802 ; NoVLX-NEXT: kmovw %k0, %eax
21803 ; NoVLX-NEXT: vzeroupper
21806 %0 = bitcast <2 x i64> %__a to <2 x double>
21807 %load = load double, double* %__b
21808 %vec = insertelement <2 x double> undef, double %load, i32 0
21809 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21810 %2 = fcmp oeq <2 x double> %0, %1
21811 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21812 %4 = bitcast <32 x i1> %3 to i32
21816 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21817 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
21818 ; VLX: # %bb.0: # %entry
21819 ; VLX-NEXT: kmovd %edi, %k1
21820 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21821 ; VLX-NEXT: kmovd %k0, %eax
21824 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
21825 ; NoVLX: # %bb.0: # %entry
21826 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21827 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21828 ; NoVLX-NEXT: kmovw %edi, %k1
21829 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21830 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21831 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21832 ; NoVLX-NEXT: kmovw %k0, %eax
21833 ; NoVLX-NEXT: vzeroupper
21836 %0 = bitcast <2 x i64> %__a to <2 x double>
21837 %1 = bitcast <2 x i64> %__b to <2 x double>
21838 %2 = fcmp oeq <2 x double> %0, %1
21839 %3 = bitcast i2 %__u to <2 x i1>
21840 %4 = and <2 x i1> %2, %3
21841 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21842 %6 = bitcast <32 x i1> %5 to i32
21846 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21847 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
21848 ; VLX: # %bb.0: # %entry
21849 ; VLX-NEXT: kmovd %edi, %k1
21850 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21851 ; VLX-NEXT: kmovd %k0, %eax
21854 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
21855 ; NoVLX: # %bb.0: # %entry
21856 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21857 ; NoVLX-NEXT: kmovw %edi, %k1
21858 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21859 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21860 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21861 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21862 ; NoVLX-NEXT: kmovw %k0, %eax
21863 ; NoVLX-NEXT: vzeroupper
21866 %0 = bitcast <2 x i64> %__a to <2 x double>
21867 %load = load <2 x i64>, <2 x i64>* %__b
21868 %1 = bitcast <2 x i64> %load to <2 x double>
21869 %2 = fcmp oeq <2 x double> %0, %1
21870 %3 = bitcast i2 %__u to <2 x i1>
21871 %4 = and <2 x i1> %2, %3
21872 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21873 %6 = bitcast <32 x i1> %5 to i32
21877 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21878 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21879 ; VLX: # %bb.0: # %entry
21880 ; VLX-NEXT: kmovd %edi, %k1
21881 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21882 ; VLX-NEXT: kmovd %k0, %eax
21885 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21886 ; NoVLX: # %bb.0: # %entry
21887 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21888 ; NoVLX-NEXT: kmovw %edi, %k1
21889 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21890 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21891 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21892 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21893 ; NoVLX-NEXT: kmovw %k0, %eax
21894 ; NoVLX-NEXT: vzeroupper
21897 %0 = bitcast <2 x i64> %__a to <2 x double>
21898 %load = load double, double* %__b
21899 %vec = insertelement <2 x double> undef, double %load, i32 0
21900 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21901 %2 = fcmp oeq <2 x double> %0, %1
21902 %3 = bitcast i2 %__u to <2 x i1>
21903 %4 = and <2 x i1> %2, %3
21904 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21905 %6 = bitcast <32 x i1> %5 to i32
21911 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21912 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
21913 ; VLX: # %bb.0: # %entry
21914 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21915 ; VLX-NEXT: kmovq %k0, %rax
21918 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
21919 ; NoVLX: # %bb.0: # %entry
21920 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21921 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21922 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21923 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21924 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21925 ; NoVLX-NEXT: kmovw %k0, %eax
21926 ; NoVLX-NEXT: vzeroupper
21929 %0 = bitcast <2 x i64> %__a to <2 x double>
21930 %1 = bitcast <2 x i64> %__b to <2 x double>
21931 %2 = fcmp oeq <2 x double> %0, %1
21932 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21933 %4 = bitcast <64 x i1> %3 to i64
21937 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21938 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
21939 ; VLX: # %bb.0: # %entry
21940 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21941 ; VLX-NEXT: kmovq %k0, %rax
21944 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
21945 ; NoVLX: # %bb.0: # %entry
21946 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21947 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21948 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21949 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21950 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21951 ; NoVLX-NEXT: kmovw %k0, %eax
21952 ; NoVLX-NEXT: vzeroupper
21955 %0 = bitcast <2 x i64> %__a to <2 x double>
21956 %load = load <2 x i64>, <2 x i64>* %__b
21957 %1 = bitcast <2 x i64> %load to <2 x double>
21958 %2 = fcmp oeq <2 x double> %0, %1
21959 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21960 %4 = bitcast <64 x i1> %3 to i64
21964 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21965 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21966 ; VLX: # %bb.0: # %entry
21967 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21968 ; VLX-NEXT: kmovq %k0, %rax
21971 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21972 ; NoVLX: # %bb.0: # %entry
21973 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21974 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21975 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21976 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21977 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21978 ; NoVLX-NEXT: kmovw %k0, %eax
21979 ; NoVLX-NEXT: vzeroupper
21982 %0 = bitcast <2 x i64> %__a to <2 x double>
21983 %load = load double, double* %__b
21984 %vec = insertelement <2 x double> undef, double %load, i32 0
21985 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21986 %2 = fcmp oeq <2 x double> %0, %1
21987 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21988 %4 = bitcast <64 x i1> %3 to i64
21992 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21993 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
21994 ; VLX: # %bb.0: # %entry
21995 ; VLX-NEXT: kmovd %edi, %k1
21996 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21997 ; VLX-NEXT: kmovq %k0, %rax
22000 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
22001 ; NoVLX: # %bb.0: # %entry
22002 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
22003 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22004 ; NoVLX-NEXT: kmovw %edi, %k1
22005 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22006 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22007 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22008 ; NoVLX-NEXT: kmovw %k0, %eax
22009 ; NoVLX-NEXT: vzeroupper
22012 %0 = bitcast <2 x i64> %__a to <2 x double>
22013 %1 = bitcast <2 x i64> %__b to <2 x double>
22014 %2 = fcmp oeq <2 x double> %0, %1
22015 %3 = bitcast i2 %__u to <2 x i1>
22016 %4 = and <2 x i1> %2, %3
22017 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22018 %6 = bitcast <64 x i1> %5 to i64
22022 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
22023 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
22024 ; VLX: # %bb.0: # %entry
22025 ; VLX-NEXT: kmovd %edi, %k1
22026 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
22027 ; VLX-NEXT: kmovq %k0, %rax
22030 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
22031 ; NoVLX: # %bb.0: # %entry
22032 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22033 ; NoVLX-NEXT: kmovw %edi, %k1
22034 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
22035 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22036 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22037 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22038 ; NoVLX-NEXT: kmovw %k0, %eax
22039 ; NoVLX-NEXT: vzeroupper
22042 %0 = bitcast <2 x i64> %__a to <2 x double>
22043 %load = load <2 x i64>, <2 x i64>* %__b
22044 %1 = bitcast <2 x i64> %load to <2 x double>
22045 %2 = fcmp oeq <2 x double> %0, %1
22046 %3 = bitcast i2 %__u to <2 x i1>
22047 %4 = and <2 x i1> %2, %3
22048 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22049 %6 = bitcast <64 x i1> %5 to i64
22053 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
22054 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
22055 ; VLX: # %bb.0: # %entry
22056 ; VLX-NEXT: kmovd %edi, %k1
22057 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
22058 ; VLX-NEXT: kmovq %k0, %rax
22061 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
22062 ; NoVLX: # %bb.0: # %entry
22063 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22064 ; NoVLX-NEXT: kmovw %edi, %k1
22065 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
22066 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22067 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22068 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22069 ; NoVLX-NEXT: kmovw %k0, %eax
22070 ; NoVLX-NEXT: vzeroupper
22073 %0 = bitcast <2 x i64> %__a to <2 x double>
22074 %load = load double, double* %__b
22075 %vec = insertelement <2 x double> undef, double %load, i32 0
22076 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
22077 %2 = fcmp oeq <2 x double> %0, %1
22078 %3 = bitcast i2 %__u to <2 x i1>
22079 %4 = and <2 x i1> %2, %3
22080 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22081 %6 = bitcast <64 x i1> %5 to i64
22087 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22088 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
22089 ; VLX: # %bb.0: # %entry
22090 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22091 ; VLX-NEXT: kmovd %k0, %eax
22092 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22093 ; VLX-NEXT: vzeroupper
22096 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
22097 ; NoVLX: # %bb.0: # %entry
22098 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22099 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22100 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22101 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22102 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22103 ; NoVLX-NEXT: kmovw %k0, %eax
22104 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22105 ; NoVLX-NEXT: vzeroupper
22108 %0 = bitcast <4 x i64> %__a to <4 x double>
22109 %1 = bitcast <4 x i64> %__b to <4 x double>
22110 %2 = fcmp oeq <4 x double> %0, %1
22111 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22112 %4 = bitcast <8 x i1> %3 to i8
22116 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22117 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
22118 ; VLX: # %bb.0: # %entry
22119 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22120 ; VLX-NEXT: kmovd %k0, %eax
22121 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22122 ; VLX-NEXT: vzeroupper
22125 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
22126 ; NoVLX: # %bb.0: # %entry
22127 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22128 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22129 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22130 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22131 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22132 ; NoVLX-NEXT: kmovw %k0, %eax
22133 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22134 ; NoVLX-NEXT: vzeroupper
22137 %0 = bitcast <4 x i64> %__a to <4 x double>
22138 %load = load <4 x i64>, <4 x i64>* %__b
22139 %1 = bitcast <4 x i64> %load to <4 x double>
22140 %2 = fcmp oeq <4 x double> %0, %1
22141 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22142 %4 = bitcast <8 x i1> %3 to i8
22146 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22147 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22148 ; VLX: # %bb.0: # %entry
22149 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22150 ; VLX-NEXT: kmovd %k0, %eax
22151 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22152 ; VLX-NEXT: vzeroupper
22155 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22156 ; NoVLX: # %bb.0: # %entry
22157 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22158 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
22159 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22160 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22161 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22162 ; NoVLX-NEXT: kmovw %k0, %eax
22163 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22164 ; NoVLX-NEXT: vzeroupper
22167 %0 = bitcast <4 x i64> %__a to <4 x double>
22168 %load = load double, double* %__b
22169 %vec = insertelement <4 x double> undef, double %load, i32 0
22170 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22171 %2 = fcmp oeq <4 x double> %0, %1
22172 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22173 %4 = bitcast <8 x i1> %3 to i8
22177 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22178 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22179 ; VLX: # %bb.0: # %entry
22180 ; VLX-NEXT: kmovd %edi, %k1
22181 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22182 ; VLX-NEXT: kmovd %k0, %eax
22183 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22184 ; VLX-NEXT: vzeroupper
22187 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22188 ; NoVLX: # %bb.0: # %entry
22189 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22190 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22191 ; NoVLX-NEXT: kmovw %edi, %k1
22192 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22193 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22194 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22195 ; NoVLX-NEXT: kmovw %k0, %eax
22196 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22197 ; NoVLX-NEXT: vzeroupper
22200 %0 = bitcast <4 x i64> %__a to <4 x double>
22201 %1 = bitcast <4 x i64> %__b to <4 x double>
22202 %2 = fcmp oeq <4 x double> %0, %1
22203 %3 = bitcast i4 %__u to <4 x i1>
22204 %4 = and <4 x i1> %2, %3
22205 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22206 %6 = bitcast <8 x i1> %5 to i8
22210 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22211 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22212 ; VLX: # %bb.0: # %entry
22213 ; VLX-NEXT: kmovd %edi, %k1
22214 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22215 ; VLX-NEXT: kmovd %k0, %eax
22216 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22217 ; VLX-NEXT: vzeroupper
22220 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22221 ; NoVLX: # %bb.0: # %entry
22222 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22223 ; NoVLX-NEXT: kmovw %edi, %k1
22224 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22225 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22226 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22227 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22228 ; NoVLX-NEXT: kmovw %k0, %eax
22229 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22230 ; NoVLX-NEXT: vzeroupper
22233 %0 = bitcast <4 x i64> %__a to <4 x double>
22234 %load = load <4 x i64>, <4 x i64>* %__b
22235 %1 = bitcast <4 x i64> %load to <4 x double>
22236 %2 = fcmp oeq <4 x double> %0, %1
22237 %3 = bitcast i4 %__u to <4 x i1>
22238 %4 = and <4 x i1> %2, %3
22239 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22240 %6 = bitcast <8 x i1> %5 to i8
22244 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22245 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22246 ; VLX: # %bb.0: # %entry
22247 ; VLX-NEXT: kmovd %edi, %k1
22248 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22249 ; VLX-NEXT: kmovd %k0, %eax
22250 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22251 ; VLX-NEXT: vzeroupper
22254 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22255 ; NoVLX: # %bb.0: # %entry
22256 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22257 ; NoVLX-NEXT: kmovw %edi, %k1
22258 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1
22259 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22260 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22261 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22262 ; NoVLX-NEXT: kmovw %k0, %eax
22263 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22264 ; NoVLX-NEXT: vzeroupper
22267 %0 = bitcast <4 x i64> %__a to <4 x double>
22268 %load = load double, double* %__b
22269 %vec = insertelement <4 x double> undef, double %load, i32 0
22270 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22271 %2 = fcmp oeq <4 x double> %0, %1
22272 %3 = bitcast i4 %__u to <4 x i1>
22273 %4 = and <4 x i1> %2, %3
22274 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22275 %6 = bitcast <8 x i1> %5 to i8
22281 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22282 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22283 ; VLX: # %bb.0: # %entry
22284 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22285 ; VLX-NEXT: kmovd %k0, %eax
22286 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22287 ; VLX-NEXT: vzeroupper
22290 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22291 ; NoVLX: # %bb.0: # %entry
22292 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22293 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22294 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22295 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22296 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22297 ; NoVLX-NEXT: kmovw %k0, %eax
22298 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22299 ; NoVLX-NEXT: vzeroupper
22302 %0 = bitcast <4 x i64> %__a to <4 x double>
22303 %1 = bitcast <4 x i64> %__b to <4 x double>
22304 %2 = fcmp oeq <4 x double> %0, %1
22305 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22306 %4 = bitcast <16 x i1> %3 to i16
22310 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22311 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22312 ; VLX: # %bb.0: # %entry
22313 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22314 ; VLX-NEXT: kmovd %k0, %eax
22315 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22316 ; VLX-NEXT: vzeroupper
22319 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22320 ; NoVLX: # %bb.0: # %entry
22321 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22322 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22323 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22324 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22325 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22326 ; NoVLX-NEXT: kmovw %k0, %eax
22327 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22328 ; NoVLX-NEXT: vzeroupper
22331 %0 = bitcast <4 x i64> %__a to <4 x double>
22332 %load = load <4 x i64>, <4 x i64>* %__b
22333 %1 = bitcast <4 x i64> %load to <4 x double>
22334 %2 = fcmp oeq <4 x double> %0, %1
22335 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22336 %4 = bitcast <16 x i1> %3 to i16
22340 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22341 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22342 ; VLX: # %bb.0: # %entry
22343 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22344 ; VLX-NEXT: kmovd %k0, %eax
22345 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22346 ; VLX-NEXT: vzeroupper
22349 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22350 ; NoVLX: # %bb.0: # %entry
22351 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22352 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
22353 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22354 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22355 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22356 ; NoVLX-NEXT: kmovw %k0, %eax
22357 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22358 ; NoVLX-NEXT: vzeroupper
22361 %0 = bitcast <4 x i64> %__a to <4 x double>
22362 %load = load double, double* %__b
22363 %vec = insertelement <4 x double> undef, double %load, i32 0
22364 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22365 %2 = fcmp oeq <4 x double> %0, %1
22366 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22367 %4 = bitcast <16 x i1> %3 to i16
22371 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22372 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22373 ; VLX: # %bb.0: # %entry
22374 ; VLX-NEXT: kmovd %edi, %k1
22375 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22376 ; VLX-NEXT: kmovd %k0, %eax
22377 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22378 ; VLX-NEXT: vzeroupper
22381 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22382 ; NoVLX: # %bb.0: # %entry
22383 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22384 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22385 ; NoVLX-NEXT: kmovw %edi, %k1
22386 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22387 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22388 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22389 ; NoVLX-NEXT: kmovw %k0, %eax
22390 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22391 ; NoVLX-NEXT: vzeroupper
22394 %0 = bitcast <4 x i64> %__a to <4 x double>
22395 %1 = bitcast <4 x i64> %__b to <4 x double>
22396 %2 = fcmp oeq <4 x double> %0, %1
22397 %3 = bitcast i4 %__u to <4 x i1>
22398 %4 = and <4 x i1> %2, %3
22399 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22400 %6 = bitcast <16 x i1> %5 to i16
22404 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22405 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22406 ; VLX: # %bb.0: # %entry
22407 ; VLX-NEXT: kmovd %edi, %k1
22408 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22409 ; VLX-NEXT: kmovd %k0, %eax
22410 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22411 ; VLX-NEXT: vzeroupper
22414 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22415 ; NoVLX: # %bb.0: # %entry
22416 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22417 ; NoVLX-NEXT: kmovw %edi, %k1
22418 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22419 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22420 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22421 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22422 ; NoVLX-NEXT: kmovw %k0, %eax
22423 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22424 ; NoVLX-NEXT: vzeroupper
22427 %0 = bitcast <4 x i64> %__a to <4 x double>
22428 %load = load <4 x i64>, <4 x i64>* %__b
22429 %1 = bitcast <4 x i64> %load to <4 x double>
22430 %2 = fcmp oeq <4 x double> %0, %1
22431 %3 = bitcast i4 %__u to <4 x i1>
22432 %4 = and <4 x i1> %2, %3
22433 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22434 %6 = bitcast <16 x i1> %5 to i16
22438 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22439 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22440 ; VLX: # %bb.0: # %entry
22441 ; VLX-NEXT: kmovd %edi, %k1
22442 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22443 ; VLX-NEXT: kmovd %k0, %eax
22444 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22445 ; VLX-NEXT: vzeroupper
22448 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22449 ; NoVLX: # %bb.0: # %entry
22450 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22451 ; NoVLX-NEXT: kmovw %edi, %k1
22452 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1
22453 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22454 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22455 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22456 ; NoVLX-NEXT: kmovw %k0, %eax
22457 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22458 ; NoVLX-NEXT: vzeroupper
22461 %0 = bitcast <4 x i64> %__a to <4 x double>
22462 %load = load double, double* %__b
22463 %vec = insertelement <4 x double> undef, double %load, i32 0
22464 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22465 %2 = fcmp oeq <4 x double> %0, %1
22466 %3 = bitcast i4 %__u to <4 x i1>
22467 %4 = and <4 x i1> %2, %3
22468 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22469 %6 = bitcast <16 x i1> %5 to i16
22475 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22476 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22477 ; VLX: # %bb.0: # %entry
22478 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22479 ; VLX-NEXT: kmovd %k0, %eax
22480 ; VLX-NEXT: vzeroupper
22483 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22484 ; NoVLX: # %bb.0: # %entry
22485 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22486 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22487 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22488 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22489 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22490 ; NoVLX-NEXT: kmovw %k0, %eax
22491 ; NoVLX-NEXT: vzeroupper
22494 %0 = bitcast <4 x i64> %__a to <4 x double>
22495 %1 = bitcast <4 x i64> %__b to <4 x double>
22496 %2 = fcmp oeq <4 x double> %0, %1
22497 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22498 %4 = bitcast <32 x i1> %3 to i32
22502 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22503 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22504 ; VLX: # %bb.0: # %entry
22505 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22506 ; VLX-NEXT: kmovd %k0, %eax
22507 ; VLX-NEXT: vzeroupper
22510 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22511 ; NoVLX: # %bb.0: # %entry
22512 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22513 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22514 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22515 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22516 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22517 ; NoVLX-NEXT: kmovw %k0, %eax
22518 ; NoVLX-NEXT: vzeroupper
22521 %0 = bitcast <4 x i64> %__a to <4 x double>
22522 %load = load <4 x i64>, <4 x i64>* %__b
22523 %1 = bitcast <4 x i64> %load to <4 x double>
22524 %2 = fcmp oeq <4 x double> %0, %1
22525 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22526 %4 = bitcast <32 x i1> %3 to i32
22530 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22531 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22532 ; VLX: # %bb.0: # %entry
22533 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22534 ; VLX-NEXT: kmovd %k0, %eax
22535 ; VLX-NEXT: vzeroupper
22538 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22539 ; NoVLX: # %bb.0: # %entry
22540 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22541 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
22542 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22543 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22544 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22545 ; NoVLX-NEXT: kmovw %k0, %eax
22546 ; NoVLX-NEXT: vzeroupper
22549 %0 = bitcast <4 x i64> %__a to <4 x double>
22550 %load = load double, double* %__b
22551 %vec = insertelement <4 x double> undef, double %load, i32 0
22552 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22553 %2 = fcmp oeq <4 x double> %0, %1
22554 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22555 %4 = bitcast <32 x i1> %3 to i32
22559 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22560 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22561 ; VLX: # %bb.0: # %entry
22562 ; VLX-NEXT: kmovd %edi, %k1
22563 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22564 ; VLX-NEXT: kmovd %k0, %eax
22565 ; VLX-NEXT: vzeroupper
22568 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22569 ; NoVLX: # %bb.0: # %entry
22570 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22571 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22572 ; NoVLX-NEXT: kmovw %edi, %k1
22573 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22574 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22575 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22576 ; NoVLX-NEXT: kmovw %k0, %eax
22577 ; NoVLX-NEXT: vzeroupper
22580 %0 = bitcast <4 x i64> %__a to <4 x double>
22581 %1 = bitcast <4 x i64> %__b to <4 x double>
22582 %2 = fcmp oeq <4 x double> %0, %1
22583 %3 = bitcast i4 %__u to <4 x i1>
22584 %4 = and <4 x i1> %2, %3
22585 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22586 %6 = bitcast <32 x i1> %5 to i32
22590 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22591 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22592 ; VLX: # %bb.0: # %entry
22593 ; VLX-NEXT: kmovd %edi, %k1
22594 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22595 ; VLX-NEXT: kmovd %k0, %eax
22596 ; VLX-NEXT: vzeroupper
22599 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22600 ; NoVLX: # %bb.0: # %entry
22601 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22602 ; NoVLX-NEXT: kmovw %edi, %k1
22603 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22604 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22605 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22606 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22607 ; NoVLX-NEXT: kmovw %k0, %eax
22608 ; NoVLX-NEXT: vzeroupper
22611 %0 = bitcast <4 x i64> %__a to <4 x double>
22612 %load = load <4 x i64>, <4 x i64>* %__b
22613 %1 = bitcast <4 x i64> %load to <4 x double>
22614 %2 = fcmp oeq <4 x double> %0, %1
22615 %3 = bitcast i4 %__u to <4 x i1>
22616 %4 = and <4 x i1> %2, %3
22617 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22618 %6 = bitcast <32 x i1> %5 to i32
22622 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22623 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22624 ; VLX: # %bb.0: # %entry
22625 ; VLX-NEXT: kmovd %edi, %k1
22626 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22627 ; VLX-NEXT: kmovd %k0, %eax
22628 ; VLX-NEXT: vzeroupper
22631 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22632 ; NoVLX: # %bb.0: # %entry
22633 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22634 ; NoVLX-NEXT: kmovw %edi, %k1
22635 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1
22636 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22637 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22638 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22639 ; NoVLX-NEXT: kmovw %k0, %eax
22640 ; NoVLX-NEXT: vzeroupper
22643 %0 = bitcast <4 x i64> %__a to <4 x double>
22644 %load = load double, double* %__b
22645 %vec = insertelement <4 x double> undef, double %load, i32 0
22646 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22647 %2 = fcmp oeq <4 x double> %0, %1
22648 %3 = bitcast i4 %__u to <4 x i1>
22649 %4 = and <4 x i1> %2, %3
22650 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22651 %6 = bitcast <32 x i1> %5 to i32
22657 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22658 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22659 ; VLX: # %bb.0: # %entry
22660 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22661 ; VLX-NEXT: kmovq %k0, %rax
22662 ; VLX-NEXT: vzeroupper
22665 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22666 ; NoVLX: # %bb.0: # %entry
22667 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22668 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22669 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22670 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22671 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22672 ; NoVLX-NEXT: kmovw %k0, %eax
22673 ; NoVLX-NEXT: vzeroupper
22676 %0 = bitcast <4 x i64> %__a to <4 x double>
22677 %1 = bitcast <4 x i64> %__b to <4 x double>
22678 %2 = fcmp oeq <4 x double> %0, %1
22679 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22680 %4 = bitcast <64 x i1> %3 to i64
22684 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22685 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22686 ; VLX: # %bb.0: # %entry
22687 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22688 ; VLX-NEXT: kmovq %k0, %rax
22689 ; VLX-NEXT: vzeroupper
22692 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22693 ; NoVLX: # %bb.0: # %entry
22694 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22695 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22696 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22697 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22698 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22699 ; NoVLX-NEXT: kmovw %k0, %eax
22700 ; NoVLX-NEXT: vzeroupper
22703 %0 = bitcast <4 x i64> %__a to <4 x double>
22704 %load = load <4 x i64>, <4 x i64>* %__b
22705 %1 = bitcast <4 x i64> %load to <4 x double>
22706 %2 = fcmp oeq <4 x double> %0, %1
22707 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22708 %4 = bitcast <64 x i1> %3 to i64
22712 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22713 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22714 ; VLX: # %bb.0: # %entry
22715 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22716 ; VLX-NEXT: kmovq %k0, %rax
22717 ; VLX-NEXT: vzeroupper
22720 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22721 ; NoVLX: # %bb.0: # %entry
22722 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22723 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
22724 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22725 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22726 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22727 ; NoVLX-NEXT: kmovw %k0, %eax
22728 ; NoVLX-NEXT: vzeroupper
22731 %0 = bitcast <4 x i64> %__a to <4 x double>
22732 %load = load double, double* %__b
22733 %vec = insertelement <4 x double> undef, double %load, i32 0
22734 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22735 %2 = fcmp oeq <4 x double> %0, %1
22736 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22737 %4 = bitcast <64 x i1> %3 to i64
22741 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22742 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
22743 ; VLX: # %bb.0: # %entry
22744 ; VLX-NEXT: kmovd %edi, %k1
22745 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22746 ; VLX-NEXT: kmovq %k0, %rax
22747 ; VLX-NEXT: vzeroupper
22750 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
22751 ; NoVLX: # %bb.0: # %entry
22752 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22753 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22754 ; NoVLX-NEXT: kmovw %edi, %k1
22755 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22756 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22757 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22758 ; NoVLX-NEXT: kmovw %k0, %eax
22759 ; NoVLX-NEXT: vzeroupper
22762 %0 = bitcast <4 x i64> %__a to <4 x double>
22763 %1 = bitcast <4 x i64> %__b to <4 x double>
22764 %2 = fcmp oeq <4 x double> %0, %1
22765 %3 = bitcast i4 %__u to <4 x i1>
22766 %4 = and <4 x i1> %2, %3
22767 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22768 %6 = bitcast <64 x i1> %5 to i64
22772 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22773 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
22774 ; VLX: # %bb.0: # %entry
22775 ; VLX-NEXT: kmovd %edi, %k1
22776 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22777 ; VLX-NEXT: kmovq %k0, %rax
22778 ; VLX-NEXT: vzeroupper
22781 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
22782 ; NoVLX: # %bb.0: # %entry
22783 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22784 ; NoVLX-NEXT: kmovw %edi, %k1
22785 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22786 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22787 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22788 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22789 ; NoVLX-NEXT: kmovw %k0, %eax
22790 ; NoVLX-NEXT: vzeroupper
22793 %0 = bitcast <4 x i64> %__a to <4 x double>
22794 %load = load <4 x i64>, <4 x i64>* %__b
22795 %1 = bitcast <4 x i64> %load to <4 x double>
22796 %2 = fcmp oeq <4 x double> %0, %1
22797 %3 = bitcast i4 %__u to <4 x i1>
22798 %4 = and <4 x i1> %2, %3
22799 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22800 %6 = bitcast <64 x i1> %5 to i64
22804 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22805 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22806 ; VLX: # %bb.0: # %entry
22807 ; VLX-NEXT: kmovd %edi, %k1
22808 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22809 ; VLX-NEXT: kmovq %k0, %rax
22810 ; VLX-NEXT: vzeroupper
22813 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22814 ; NoVLX: # %bb.0: # %entry
22815 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22816 ; NoVLX-NEXT: kmovw %edi, %k1
22817 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1
22818 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22819 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22820 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22821 ; NoVLX-NEXT: kmovw %k0, %eax
22822 ; NoVLX-NEXT: vzeroupper
22825 %0 = bitcast <4 x i64> %__a to <4 x double>
22826 %load = load double, double* %__b
22827 %vec = insertelement <4 x double> undef, double %load, i32 0
22828 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22829 %2 = fcmp oeq <4 x double> %0, %1
22830 %3 = bitcast i4 %__u to <4 x i1>
22831 %4 = and <4 x i1> %2, %3
22832 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22833 %6 = bitcast <64 x i1> %5 to i64
22839 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22840 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
22841 ; VLX: # %bb.0: # %entry
22842 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22843 ; VLX-NEXT: kmovd %k0, %eax
22844 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22845 ; VLX-NEXT: vzeroupper
22848 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
22849 ; NoVLX: # %bb.0: # %entry
22850 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22851 ; NoVLX-NEXT: kmovw %k0, %eax
22852 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22853 ; NoVLX-NEXT: vzeroupper
22856 %0 = bitcast <8 x i64> %__a to <8 x double>
22857 %1 = bitcast <8 x i64> %__b to <8 x double>
22858 %2 = fcmp oeq <8 x double> %0, %1
22859 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22860 %4 = bitcast <16 x i1> %3 to i16
22864 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
22865 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
22866 ; VLX: # %bb.0: # %entry
22867 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
22868 ; VLX-NEXT: kmovd %k0, %eax
22869 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22870 ; VLX-NEXT: vzeroupper
22873 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
22874 ; NoVLX: # %bb.0: # %entry
22875 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
22876 ; NoVLX-NEXT: kmovw %k0, %eax
22877 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22878 ; NoVLX-NEXT: vzeroupper
22881 %0 = bitcast <8 x i64> %__a to <8 x double>
22882 %load = load <8 x i64>, <8 x i64>* %__b
22883 %1 = bitcast <8 x i64> %load to <8 x double>
22884 %2 = fcmp oeq <8 x double> %0, %1
22885 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22886 %4 = bitcast <16 x i1> %3 to i16
22890 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
22891 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22892 ; VLX: # %bb.0: # %entry
22893 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22894 ; VLX-NEXT: kmovd %k0, %eax
22895 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22896 ; VLX-NEXT: vzeroupper
22899 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22900 ; NoVLX: # %bb.0: # %entry
22901 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22902 ; NoVLX-NEXT: kmovw %k0, %eax
22903 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22904 ; NoVLX-NEXT: vzeroupper
22907 %0 = bitcast <8 x i64> %__a to <8 x double>
22908 %load = load double, double* %__b
22909 %vec = insertelement <8 x double> undef, double %load, i32 0
22910 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22911 %2 = fcmp oeq <8 x double> %0, %1
22912 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22913 %4 = bitcast <16 x i1> %3 to i16
22917 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22918 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
22919 ; VLX: # %bb.0: # %entry
22920 ; VLX-NEXT: kmovd %edi, %k1
22921 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22922 ; VLX-NEXT: kmovd %k0, %eax
22923 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22924 ; VLX-NEXT: vzeroupper
22927 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
22928 ; NoVLX: # %bb.0: # %entry
22929 ; NoVLX-NEXT: kmovw %edi, %k1
22930 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22931 ; NoVLX-NEXT: kmovw %k0, %eax
22932 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22933 ; NoVLX-NEXT: vzeroupper
22936 %0 = bitcast <8 x i64> %__a to <8 x double>
22937 %1 = bitcast <8 x i64> %__b to <8 x double>
22938 %2 = fcmp oeq <8 x double> %0, %1
22939 %3 = bitcast i8 %__u to <8 x i1>
22940 %4 = and <8 x i1> %2, %3
22941 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22942 %6 = bitcast <16 x i1> %5 to i16
22946 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
22947 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
22948 ; VLX: # %bb.0: # %entry
22949 ; VLX-NEXT: kmovd %edi, %k1
22950 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
22951 ; VLX-NEXT: kmovd %k0, %eax
22952 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22953 ; VLX-NEXT: vzeroupper
22956 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
22957 ; NoVLX: # %bb.0: # %entry
22958 ; NoVLX-NEXT: kmovw %edi, %k1
22959 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
22960 ; NoVLX-NEXT: kmovw %k0, %eax
22961 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22962 ; NoVLX-NEXT: vzeroupper
22965 %0 = bitcast <8 x i64> %__a to <8 x double>
22966 %load = load <8 x i64>, <8 x i64>* %__b
22967 %1 = bitcast <8 x i64> %load to <8 x double>
22968 %2 = fcmp oeq <8 x double> %0, %1
22969 %3 = bitcast i8 %__u to <8 x i1>
22970 %4 = and <8 x i1> %2, %3
22971 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22972 %6 = bitcast <16 x i1> %5 to i16
22976 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
22977 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22978 ; VLX: # %bb.0: # %entry
22979 ; VLX-NEXT: kmovd %edi, %k1
22980 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22981 ; VLX-NEXT: kmovd %k0, %eax
22982 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22983 ; VLX-NEXT: vzeroupper
22986 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22987 ; NoVLX: # %bb.0: # %entry
22988 ; NoVLX-NEXT: kmovw %edi, %k1
22989 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22990 ; NoVLX-NEXT: kmovw %k0, %eax
22991 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22992 ; NoVLX-NEXT: vzeroupper
22995 %0 = bitcast <8 x i64> %__a to <8 x double>
22996 %load = load double, double* %__b
22997 %vec = insertelement <8 x double> undef, double %load, i32 0
22998 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22999 %2 = fcmp oeq <8 x double> %0, %1
23000 %3 = bitcast i8 %__u to <8 x i1>
23001 %4 = and <8 x i1> %2, %3
23002 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23003 %6 = bitcast <16 x i1> %5 to i16
23009 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23010 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
23011 ; VLX: # %bb.0: # %entry
23012 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23013 ; VLX-NEXT: kmovd %k0, %eax
23014 ; VLX-NEXT: movzbl %al, %eax
23015 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
23016 ; VLX-NEXT: vzeroupper
23019 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
23020 ; NoVLX: # %bb.0: # %entry
23021 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23022 ; NoVLX-NEXT: kmovw %k0, %eax
23023 ; NoVLX-NEXT: movzbl %al, %eax
23024 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
23025 ; NoVLX-NEXT: vzeroupper
23028 %0 = bitcast <8 x i64> %__a to <8 x double>
23029 %1 = bitcast <8 x i64> %__b to <8 x double>
23030 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23031 %3 = bitcast <8 x i1> %2 to i8
23032 %4 = zext i8 %3 to i16
23036 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23037 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
23038 ; VLX: # %bb.0: # %entry
23039 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23040 ; VLX-NEXT: kmovd %k0, %eax
23041 ; VLX-NEXT: andb %dil, %al
23042 ; VLX-NEXT: movzbl %al, %eax
23043 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
23044 ; VLX-NEXT: vzeroupper
23047 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
23048 ; NoVLX: # %bb.0: # %entry
23049 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23050 ; NoVLX-NEXT: kmovw %k0, %eax
23051 ; NoVLX-NEXT: andb %dil, %al
23052 ; NoVLX-NEXT: movzbl %al, %eax
23053 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
23054 ; NoVLX-NEXT: vzeroupper
23057 %0 = bitcast <8 x i64> %__a to <8 x double>
23058 %1 = bitcast <8 x i64> %__b to <8 x double>
23059 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23060 %3 = bitcast i8 %__u to <8 x i1>
23061 %4 = and <8 x i1> %2, %3
23062 %5 = bitcast <8 x i1> %4 to i8
23063 %6 = zext i8 %5 to i16
23069 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23070 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
23071 ; VLX: # %bb.0: # %entry
23072 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23073 ; VLX-NEXT: kmovd %k0, %eax
23074 ; VLX-NEXT: vzeroupper
23077 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
23078 ; NoVLX: # %bb.0: # %entry
23079 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23080 ; NoVLX-NEXT: kmovw %k0, %eax
23081 ; NoVLX-NEXT: vzeroupper
23084 %0 = bitcast <8 x i64> %__a to <8 x double>
23085 %1 = bitcast <8 x i64> %__b to <8 x double>
23086 %2 = fcmp oeq <8 x double> %0, %1
23087 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23088 %4 = bitcast <32 x i1> %3 to i32
23092 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23093 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
23094 ; VLX: # %bb.0: # %entry
23095 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23096 ; VLX-NEXT: kmovd %k0, %eax
23097 ; VLX-NEXT: vzeroupper
23100 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
23101 ; NoVLX: # %bb.0: # %entry
23102 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23103 ; NoVLX-NEXT: kmovw %k0, %eax
23104 ; NoVLX-NEXT: vzeroupper
23107 %0 = bitcast <8 x i64> %__a to <8 x double>
23108 %load = load <8 x i64>, <8 x i64>* %__b
23109 %1 = bitcast <8 x i64> %load to <8 x double>
23110 %2 = fcmp oeq <8 x double> %0, %1
23111 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23112 %4 = bitcast <32 x i1> %3 to i32
23116 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
23117 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23118 ; VLX: # %bb.0: # %entry
23119 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23120 ; VLX-NEXT: kmovd %k0, %eax
23121 ; VLX-NEXT: vzeroupper
23124 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23125 ; NoVLX: # %bb.0: # %entry
23126 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23127 ; NoVLX-NEXT: kmovw %k0, %eax
23128 ; NoVLX-NEXT: vzeroupper
23131 %0 = bitcast <8 x i64> %__a to <8 x double>
23132 %load = load double, double* %__b
23133 %vec = insertelement <8 x double> undef, double %load, i32 0
23134 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23135 %2 = fcmp oeq <8 x double> %0, %1
23136 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23137 %4 = bitcast <32 x i1> %3 to i32
23141 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23142 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
23143 ; VLX: # %bb.0: # %entry
23144 ; VLX-NEXT: kmovd %edi, %k1
23145 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23146 ; VLX-NEXT: kmovd %k0, %eax
23147 ; VLX-NEXT: vzeroupper
23150 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
23151 ; NoVLX: # %bb.0: # %entry
23152 ; NoVLX-NEXT: kmovw %edi, %k1
23153 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23154 ; NoVLX-NEXT: kmovw %k0, %eax
23155 ; NoVLX-NEXT: vzeroupper
23158 %0 = bitcast <8 x i64> %__a to <8 x double>
23159 %1 = bitcast <8 x i64> %__b to <8 x double>
23160 %2 = fcmp oeq <8 x double> %0, %1
23161 %3 = bitcast i8 %__u to <8 x i1>
23162 %4 = and <8 x i1> %2, %3
23163 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23164 %6 = bitcast <32 x i1> %5 to i32
23168 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23169 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23170 ; VLX: # %bb.0: # %entry
23171 ; VLX-NEXT: kmovd %edi, %k1
23172 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23173 ; VLX-NEXT: kmovd %k0, %eax
23174 ; VLX-NEXT: vzeroupper
23177 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23178 ; NoVLX: # %bb.0: # %entry
23179 ; NoVLX-NEXT: kmovw %edi, %k1
23180 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23181 ; NoVLX-NEXT: kmovw %k0, %eax
23182 ; NoVLX-NEXT: vzeroupper
23185 %0 = bitcast <8 x i64> %__a to <8 x double>
23186 %load = load <8 x i64>, <8 x i64>* %__b
23187 %1 = bitcast <8 x i64> %load to <8 x double>
23188 %2 = fcmp oeq <8 x double> %0, %1
23189 %3 = bitcast i8 %__u to <8 x i1>
23190 %4 = and <8 x i1> %2, %3
23191 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23192 %6 = bitcast <32 x i1> %5 to i32
23196 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
23197 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23198 ; VLX: # %bb.0: # %entry
23199 ; VLX-NEXT: kmovd %edi, %k1
23200 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23201 ; VLX-NEXT: kmovd %k0, %eax
23202 ; VLX-NEXT: vzeroupper
23205 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23206 ; NoVLX: # %bb.0: # %entry
23207 ; NoVLX-NEXT: kmovw %edi, %k1
23208 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23209 ; NoVLX-NEXT: kmovw %k0, %eax
23210 ; NoVLX-NEXT: vzeroupper
23213 %0 = bitcast <8 x i64> %__a to <8 x double>
23214 %load = load double, double* %__b
23215 %vec = insertelement <8 x double> undef, double %load, i32 0
23216 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23217 %2 = fcmp oeq <8 x double> %0, %1
23218 %3 = bitcast i8 %__u to <8 x i1>
23219 %4 = and <8 x i1> %2, %3
23220 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23221 %6 = bitcast <32 x i1> %5 to i32
23227 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23228 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23229 ; VLX: # %bb.0: # %entry
23230 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23231 ; VLX-NEXT: kmovb %k0, %eax
23232 ; VLX-NEXT: vzeroupper
23235 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23236 ; NoVLX: # %bb.0: # %entry
23237 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23238 ; NoVLX-NEXT: kmovw %k0, %eax
23239 ; NoVLX-NEXT: movzbl %al, %eax
23240 ; NoVLX-NEXT: vzeroupper
23243 %0 = bitcast <8 x i64> %__a to <8 x double>
23244 %1 = bitcast <8 x i64> %__b to <8 x double>
23245 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23246 %3 = bitcast <8 x i1> %2 to i8
23247 %4 = zext i8 %3 to i32
23251 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23252 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23253 ; VLX: # %bb.0: # %entry
23254 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23255 ; VLX-NEXT: kmovd %k0, %eax
23256 ; VLX-NEXT: andb %dil, %al
23257 ; VLX-NEXT: movzbl %al, %eax
23258 ; VLX-NEXT: vzeroupper
23261 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23262 ; NoVLX: # %bb.0: # %entry
23263 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23264 ; NoVLX-NEXT: kmovw %k0, %eax
23265 ; NoVLX-NEXT: andb %dil, %al
23266 ; NoVLX-NEXT: movzbl %al, %eax
23267 ; NoVLX-NEXT: vzeroupper
23270 %0 = bitcast <8 x i64> %__a to <8 x double>
23271 %1 = bitcast <8 x i64> %__b to <8 x double>
23272 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23273 %3 = bitcast i8 %__u to <8 x i1>
23274 %4 = and <8 x i1> %2, %3
23275 %5 = bitcast <8 x i1> %4 to i8
23276 %6 = zext i8 %5 to i32
23282 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23283 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23284 ; VLX: # %bb.0: # %entry
23285 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23286 ; VLX-NEXT: kmovq %k0, %rax
23287 ; VLX-NEXT: vzeroupper
23290 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23291 ; NoVLX: # %bb.0: # %entry
23292 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23293 ; NoVLX-NEXT: kmovw %k0, %eax
23294 ; NoVLX-NEXT: vzeroupper
23297 %0 = bitcast <8 x i64> %__a to <8 x double>
23298 %1 = bitcast <8 x i64> %__b to <8 x double>
23299 %2 = fcmp oeq <8 x double> %0, %1
23300 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23301 %4 = bitcast <64 x i1> %3 to i64
23305 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23306 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23307 ; VLX: # %bb.0: # %entry
23308 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23309 ; VLX-NEXT: kmovq %k0, %rax
23310 ; VLX-NEXT: vzeroupper
23313 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23314 ; NoVLX: # %bb.0: # %entry
23315 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23316 ; NoVLX-NEXT: kmovw %k0, %eax
23317 ; NoVLX-NEXT: vzeroupper
23320 %0 = bitcast <8 x i64> %__a to <8 x double>
23321 %load = load <8 x i64>, <8 x i64>* %__b
23322 %1 = bitcast <8 x i64> %load to <8 x double>
23323 %2 = fcmp oeq <8 x double> %0, %1
23324 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23325 %4 = bitcast <64 x i1> %3 to i64
23329 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
23330 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23331 ; VLX: # %bb.0: # %entry
23332 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23333 ; VLX-NEXT: kmovq %k0, %rax
23334 ; VLX-NEXT: vzeroupper
23337 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23338 ; NoVLX: # %bb.0: # %entry
23339 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23340 ; NoVLX-NEXT: kmovw %k0, %eax
23341 ; NoVLX-NEXT: vzeroupper
23344 %0 = bitcast <8 x i64> %__a to <8 x double>
23345 %load = load double, double* %__b
23346 %vec = insertelement <8 x double> undef, double %load, i32 0
23347 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23348 %2 = fcmp oeq <8 x double> %0, %1
23349 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23350 %4 = bitcast <64 x i1> %3 to i64
23354 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23355 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23356 ; VLX: # %bb.0: # %entry
23357 ; VLX-NEXT: kmovd %edi, %k1
23358 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23359 ; VLX-NEXT: kmovq %k0, %rax
23360 ; VLX-NEXT: vzeroupper
23363 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23364 ; NoVLX: # %bb.0: # %entry
23365 ; NoVLX-NEXT: kmovw %edi, %k1
23366 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23367 ; NoVLX-NEXT: kmovw %k0, %eax
23368 ; NoVLX-NEXT: vzeroupper
23371 %0 = bitcast <8 x i64> %__a to <8 x double>
23372 %1 = bitcast <8 x i64> %__b to <8 x double>
23373 %2 = fcmp oeq <8 x double> %0, %1
23374 %3 = bitcast i8 %__u to <8 x i1>
23375 %4 = and <8 x i1> %2, %3
23376 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23377 %6 = bitcast <64 x i1> %5 to i64
23381 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23382 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23383 ; VLX: # %bb.0: # %entry
23384 ; VLX-NEXT: kmovd %edi, %k1
23385 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23386 ; VLX-NEXT: kmovq %k0, %rax
23387 ; VLX-NEXT: vzeroupper
23390 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23391 ; NoVLX: # %bb.0: # %entry
23392 ; NoVLX-NEXT: kmovw %edi, %k1
23393 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23394 ; NoVLX-NEXT: kmovw %k0, %eax
23395 ; NoVLX-NEXT: vzeroupper
23398 %0 = bitcast <8 x i64> %__a to <8 x double>
23399 %load = load <8 x i64>, <8 x i64>* %__b
23400 %1 = bitcast <8 x i64> %load to <8 x double>
23401 %2 = fcmp oeq <8 x double> %0, %1
23402 %3 = bitcast i8 %__u to <8 x i1>
23403 %4 = and <8 x i1> %2, %3
23404 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23405 %6 = bitcast <64 x i1> %5 to i64
23409 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
23410 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23411 ; VLX: # %bb.0: # %entry
23412 ; VLX-NEXT: kmovd %edi, %k1
23413 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23414 ; VLX-NEXT: kmovq %k0, %rax
23415 ; VLX-NEXT: vzeroupper
23418 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23419 ; NoVLX: # %bb.0: # %entry
23420 ; NoVLX-NEXT: kmovw %edi, %k1
23421 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23422 ; NoVLX-NEXT: kmovw %k0, %eax
23423 ; NoVLX-NEXT: vzeroupper
23426 %0 = bitcast <8 x i64> %__a to <8 x double>
23427 %load = load double, double* %__b
23428 %vec = insertelement <8 x double> undef, double %load, i32 0
23429 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23430 %2 = fcmp oeq <8 x double> %0, %1
23431 %3 = bitcast i8 %__u to <8 x i1>
23432 %4 = and <8 x i1> %2, %3
23433 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23434 %6 = bitcast <64 x i1> %5 to i64
23440 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23441 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23442 ; VLX: # %bb.0: # %entry
23443 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23444 ; VLX-NEXT: kmovb %k0, %eax
23445 ; VLX-NEXT: vzeroupper
23448 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23449 ; NoVLX: # %bb.0: # %entry
23450 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23451 ; NoVLX-NEXT: kmovw %k0, %eax
23452 ; NoVLX-NEXT: movzbl %al, %eax
23453 ; NoVLX-NEXT: vzeroupper
23456 %0 = bitcast <8 x i64> %__a to <8 x double>
23457 %1 = bitcast <8 x i64> %__b to <8 x double>
23458 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23459 %3 = bitcast <8 x i1> %2 to i8
23460 %4 = zext i8 %3 to i64
23464 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23465 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23466 ; VLX: # %bb.0: # %entry
23467 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23468 ; VLX-NEXT: kmovd %k0, %eax
23469 ; VLX-NEXT: andb %dil, %al
23470 ; VLX-NEXT: movzbl %al, %eax
23471 ; VLX-NEXT: vzeroupper
23474 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23475 ; NoVLX: # %bb.0: # %entry
23476 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23477 ; NoVLX-NEXT: kmovw %k0, %eax
23478 ; NoVLX-NEXT: andb %dil, %al
23479 ; NoVLX-NEXT: movzbl %al, %eax
23480 ; NoVLX-NEXT: vzeroupper
23483 %0 = bitcast <8 x i64> %__a to <8 x double>
23484 %1 = bitcast <8 x i64> %__b to <8 x double>
23485 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23486 %3 = bitcast i8 %__u to <8 x i1>
23487 %4 = and <8 x i1> %2, %3
23488 %5 = bitcast <8 x i1> %4 to i8
23489 %6 = zext i8 %5 to i64
23493 ; Test that we understand that cmpps with rounding zeros the upper bits of the mask register.
23494 define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) {
23495 ; VLX-LABEL: test_cmpm_rnd_zero:
23497 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
23498 ; VLX-NEXT: kmovd %k0, %eax
23499 ; VLX-NEXT: vzeroupper
23502 ; NoVLX-LABEL: test_cmpm_rnd_zero:
23504 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
23505 ; NoVLX-NEXT: kmovw %k0, %eax
23506 ; NoVLX-NEXT: vzeroupper
23508 %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
23509 %1 = bitcast <16 x i1> %res to i16
23510 %cast = bitcast i16 %1 to <16 x i1>
23511 %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
23512 %cast2 = bitcast <32 x i1> %shuffle to i32
23516 define i8 @mask_zero_lower(<4 x i32> %a) {
23517 ; VLX-LABEL: mask_zero_lower:
23519 ; VLX-NEXT: vptestmd %xmm0, %xmm0, %k0
23520 ; VLX-NEXT: kshiftlb $4, %k0, %k0
23521 ; VLX-NEXT: kmovd %k0, %eax
23522 ; VLX-NEXT: # kill: def $al killed $al killed $eax
23525 ; NoVLX-LABEL: mask_zero_lower:
23527 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23528 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
23529 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
23530 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
23531 ; NoVLX-NEXT: kmovw %k0, %eax
23532 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
23533 ; NoVLX-NEXT: vzeroupper
23535 %cmp = icmp ne <4 x i32> %a, zeroinitializer
23536 %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
23537 %cast = bitcast <8 x i1> %concat to i8