1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
5 define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6 ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
7 ; VLX: # %bb.0: # %entry
8 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
9 ; VLX-NEXT: kmovd %k0, %eax
12 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
13 ; NoVLX: # %bb.0: # %entry
14 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
15 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
16 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
17 ; NoVLX-NEXT: kmovw %k0, %eax
18 ; NoVLX-NEXT: vzeroupper
21 %0 = bitcast <2 x i64> %__a to <16 x i8>
22 %1 = bitcast <2 x i64> %__b to <16 x i8>
23 %2 = icmp eq <16 x i8> %0, %1
24 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
25 %4 = bitcast <32 x i1> %3 to i32
29 define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
30 ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
31 ; VLX: # %bb.0: # %entry
32 ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
33 ; VLX-NEXT: kmovd %k0, %eax
36 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
37 ; NoVLX: # %bb.0: # %entry
38 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
39 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
40 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
41 ; NoVLX-NEXT: kmovw %k0, %eax
42 ; NoVLX-NEXT: vzeroupper
45 %0 = bitcast <2 x i64> %__a to <16 x i8>
46 %load = load <2 x i64>, ptr %__b
47 %1 = bitcast <2 x i64> %load to <16 x i8>
48 %2 = icmp eq <16 x i8> %0, %1
49 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
50 %4 = bitcast <32 x i1> %3 to i32
54 define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
55 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
56 ; VLX: # %bb.0: # %entry
57 ; VLX-NEXT: kmovd %edi, %k1
58 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
59 ; VLX-NEXT: kmovd %k0, %eax
62 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
63 ; NoVLX: # %bb.0: # %entry
64 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
65 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
66 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
67 ; NoVLX-NEXT: kmovw %k0, %eax
68 ; NoVLX-NEXT: andl %edi, %eax
69 ; NoVLX-NEXT: vzeroupper
72 %0 = bitcast <2 x i64> %__a to <16 x i8>
73 %1 = bitcast <2 x i64> %__b to <16 x i8>
74 %2 = icmp eq <16 x i8> %0, %1
75 %3 = bitcast i16 %__u to <16 x i1>
76 %4 = and <16 x i1> %2, %3
77 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
78 %6 = bitcast <32 x i1> %5 to i32
82 define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
83 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
84 ; VLX: # %bb.0: # %entry
85 ; VLX-NEXT: kmovd %edi, %k1
86 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
87 ; VLX-NEXT: kmovd %k0, %eax
90 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
91 ; NoVLX: # %bb.0: # %entry
92 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
93 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
94 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
95 ; NoVLX-NEXT: kmovw %k0, %eax
96 ; NoVLX-NEXT: andl %edi, %eax
97 ; NoVLX-NEXT: vzeroupper
100 %0 = bitcast <2 x i64> %__a to <16 x i8>
101 %load = load <2 x i64>, ptr %__b
102 %1 = bitcast <2 x i64> %load to <16 x i8>
103 %2 = icmp eq <16 x i8> %0, %1
104 %3 = bitcast i16 %__u to <16 x i1>
105 %4 = and <16 x i1> %2, %3
106 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
107 %6 = bitcast <32 x i1> %5 to i32
112 define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
113 ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
114 ; VLX: # %bb.0: # %entry
115 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
116 ; VLX-NEXT: kmovq %k0, %rax
119 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
120 ; NoVLX: # %bb.0: # %entry
121 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
122 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
123 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
124 ; NoVLX-NEXT: kmovw %k0, %eax
125 ; NoVLX-NEXT: vzeroupper
128 %0 = bitcast <2 x i64> %__a to <16 x i8>
129 %1 = bitcast <2 x i64> %__b to <16 x i8>
130 %2 = icmp eq <16 x i8> %0, %1
131 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
132 %4 = bitcast <64 x i1> %3 to i64
136 define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
137 ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
138 ; VLX: # %bb.0: # %entry
139 ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
140 ; VLX-NEXT: kmovq %k0, %rax
143 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
144 ; NoVLX: # %bb.0: # %entry
145 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
146 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
147 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
148 ; NoVLX-NEXT: kmovw %k0, %eax
149 ; NoVLX-NEXT: vzeroupper
152 %0 = bitcast <2 x i64> %__a to <16 x i8>
153 %load = load <2 x i64>, ptr %__b
154 %1 = bitcast <2 x i64> %load to <16 x i8>
155 %2 = icmp eq <16 x i8> %0, %1
156 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
157 %4 = bitcast <64 x i1> %3 to i64
161 define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
162 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
163 ; VLX: # %bb.0: # %entry
164 ; VLX-NEXT: kmovd %edi, %k1
165 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
166 ; VLX-NEXT: kmovq %k0, %rax
169 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
170 ; NoVLX: # %bb.0: # %entry
171 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
172 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
173 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
174 ; NoVLX-NEXT: kmovw %k0, %eax
175 ; NoVLX-NEXT: andl %edi, %eax
176 ; NoVLX-NEXT: vzeroupper
179 %0 = bitcast <2 x i64> %__a to <16 x i8>
180 %1 = bitcast <2 x i64> %__b to <16 x i8>
181 %2 = icmp eq <16 x i8> %0, %1
182 %3 = bitcast i16 %__u to <16 x i1>
183 %4 = and <16 x i1> %2, %3
184 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
185 %6 = bitcast <64 x i1> %5 to i64
189 define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
190 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
191 ; VLX: # %bb.0: # %entry
192 ; VLX-NEXT: kmovd %edi, %k1
193 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
194 ; VLX-NEXT: kmovq %k0, %rax
197 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
198 ; NoVLX: # %bb.0: # %entry
199 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
200 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
201 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
202 ; NoVLX-NEXT: kmovw %k0, %eax
203 ; NoVLX-NEXT: andl %edi, %eax
204 ; NoVLX-NEXT: vzeroupper
207 %0 = bitcast <2 x i64> %__a to <16 x i8>
208 %load = load <2 x i64>, ptr %__b
209 %1 = bitcast <2 x i64> %load to <16 x i8>
210 %2 = icmp eq <16 x i8> %0, %1
211 %3 = bitcast i16 %__u to <16 x i1>
212 %4 = and <16 x i1> %2, %3
213 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
214 %6 = bitcast <64 x i1> %5 to i64
219 define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
220 ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
221 ; VLX: # %bb.0: # %entry
222 ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
223 ; VLX-NEXT: kmovq %k0, %rax
224 ; VLX-NEXT: vzeroupper
227 ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
228 ; NoVLX: # %bb.0: # %entry
229 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
230 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
231 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
232 ; NoVLX-NEXT: kmovw %k0, %ecx
233 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
234 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
235 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
236 ; NoVLX-NEXT: kmovw %k0, %eax
237 ; NoVLX-NEXT: shll $16, %eax
238 ; NoVLX-NEXT: orl %ecx, %eax
239 ; NoVLX-NEXT: vzeroupper
242 %0 = bitcast <4 x i64> %__a to <32 x i8>
243 %1 = bitcast <4 x i64> %__b to <32 x i8>
244 %2 = icmp eq <32 x i8> %0, %1
245 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
246 %4 = bitcast <64 x i1> %3 to i64
250 define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
251 ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
252 ; VLX: # %bb.0: # %entry
253 ; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
254 ; VLX-NEXT: kmovq %k0, %rax
255 ; VLX-NEXT: vzeroupper
258 ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
259 ; NoVLX: # %bb.0: # %entry
260 ; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0
261 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
262 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
263 ; NoVLX-NEXT: kmovw %k0, %ecx
264 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
265 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
266 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
267 ; NoVLX-NEXT: kmovw %k0, %eax
268 ; NoVLX-NEXT: shll $16, %eax
269 ; NoVLX-NEXT: orl %ecx, %eax
270 ; NoVLX-NEXT: vzeroupper
273 %0 = bitcast <4 x i64> %__a to <32 x i8>
274 %load = load <4 x i64>, ptr %__b
275 %1 = bitcast <4 x i64> %load to <32 x i8>
276 %2 = icmp eq <32 x i8> %0, %1
277 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
278 %4 = bitcast <64 x i1> %3 to i64
282 define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
283 ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
284 ; VLX: # %bb.0: # %entry
285 ; VLX-NEXT: kmovd %edi, %k1
286 ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
287 ; VLX-NEXT: kmovq %k0, %rax
288 ; VLX-NEXT: vzeroupper
291 ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
292 ; NoVLX: # %bb.0: # %entry
293 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
294 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
295 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
296 ; NoVLX-NEXT: kmovw %k0, %eax
297 ; NoVLX-NEXT: andl %edi, %eax
298 ; NoVLX-NEXT: shrl $16, %edi
299 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
300 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
301 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
302 ; NoVLX-NEXT: kmovw %k0, %ecx
303 ; NoVLX-NEXT: andl %edi, %ecx
304 ; NoVLX-NEXT: shll $16, %ecx
305 ; NoVLX-NEXT: movzwl %ax, %eax
306 ; NoVLX-NEXT: orl %ecx, %eax
307 ; NoVLX-NEXT: vzeroupper
310 %0 = bitcast <4 x i64> %__a to <32 x i8>
311 %1 = bitcast <4 x i64> %__b to <32 x i8>
312 %2 = icmp eq <32 x i8> %0, %1
313 %3 = bitcast i32 %__u to <32 x i1>
314 %4 = and <32 x i1> %2, %3
315 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
316 %6 = bitcast <64 x i1> %5 to i64
320 define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
321 ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
322 ; VLX: # %bb.0: # %entry
323 ; VLX-NEXT: kmovd %edi, %k1
324 ; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
325 ; VLX-NEXT: kmovq %k0, %rax
326 ; VLX-NEXT: vzeroupper
329 ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
330 ; NoVLX: # %bb.0: # %entry
331 ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
332 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
333 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
334 ; NoVLX-NEXT: kmovw %k0, %eax
335 ; NoVLX-NEXT: andl %edi, %eax
336 ; NoVLX-NEXT: shrl $16, %edi
337 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
338 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
339 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
340 ; NoVLX-NEXT: kmovw %k0, %ecx
341 ; NoVLX-NEXT: andl %edi, %ecx
342 ; NoVLX-NEXT: shll $16, %ecx
343 ; NoVLX-NEXT: movzwl %ax, %eax
344 ; NoVLX-NEXT: orl %ecx, %eax
345 ; NoVLX-NEXT: vzeroupper
348 %0 = bitcast <4 x i64> %__a to <32 x i8>
349 %load = load <4 x i64>, ptr %__b
350 %1 = bitcast <4 x i64> %load to <32 x i8>
351 %2 = icmp eq <32 x i8> %0, %1
352 %3 = bitcast i32 %__u to <32 x i1>
353 %4 = and <32 x i1> %2, %3
354 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
355 %6 = bitcast <64 x i1> %5 to i64
360 define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
361 ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
362 ; VLX: # %bb.0: # %entry
363 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
364 ; VLX-NEXT: kmovd %k0, %eax
365 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
368 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
369 ; NoVLX: # %bb.0: # %entry
370 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
371 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
372 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
373 ; NoVLX-NEXT: kmovw %k0, %eax
374 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
375 ; NoVLX-NEXT: vzeroupper
378 %0 = bitcast <2 x i64> %__a to <8 x i16>
379 %1 = bitcast <2 x i64> %__b to <8 x i16>
380 %2 = icmp eq <8 x i16> %0, %1
381 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
382 %4 = bitcast <16 x i1> %3 to i16
386 define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
387 ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
388 ; VLX: # %bb.0: # %entry
389 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
390 ; VLX-NEXT: kmovd %k0, %eax
391 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
394 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
395 ; NoVLX: # %bb.0: # %entry
396 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
397 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
398 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
399 ; NoVLX-NEXT: kmovw %k0, %eax
400 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
401 ; NoVLX-NEXT: vzeroupper
404 %0 = bitcast <2 x i64> %__a to <8 x i16>
405 %load = load <2 x i64>, ptr %__b
406 %1 = bitcast <2 x i64> %load to <8 x i16>
407 %2 = icmp eq <8 x i16> %0, %1
408 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
409 %4 = bitcast <16 x i1> %3 to i16
413 define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
414 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
415 ; VLX: # %bb.0: # %entry
416 ; VLX-NEXT: kmovd %edi, %k1
417 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
418 ; VLX-NEXT: kmovd %k0, %eax
419 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
422 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
423 ; NoVLX: # %bb.0: # %entry
424 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
425 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
426 ; NoVLX-NEXT: kmovw %edi, %k1
427 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
428 ; NoVLX-NEXT: kmovw %k0, %eax
429 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
430 ; NoVLX-NEXT: vzeroupper
433 %0 = bitcast <2 x i64> %__a to <8 x i16>
434 %1 = bitcast <2 x i64> %__b to <8 x i16>
435 %2 = icmp eq <8 x i16> %0, %1
436 %3 = bitcast i8 %__u to <8 x i1>
437 %4 = and <8 x i1> %2, %3
438 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
439 %6 = bitcast <16 x i1> %5 to i16
443 define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
444 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
445 ; VLX: # %bb.0: # %entry
446 ; VLX-NEXT: kmovd %edi, %k1
447 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
448 ; VLX-NEXT: kmovd %k0, %eax
449 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
452 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
453 ; NoVLX: # %bb.0: # %entry
454 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
455 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
456 ; NoVLX-NEXT: kmovw %edi, %k1
457 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
458 ; NoVLX-NEXT: kmovw %k0, %eax
459 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
460 ; NoVLX-NEXT: vzeroupper
463 %0 = bitcast <2 x i64> %__a to <8 x i16>
464 %load = load <2 x i64>, ptr %__b
465 %1 = bitcast <2 x i64> %load to <8 x i16>
466 %2 = icmp eq <8 x i16> %0, %1
467 %3 = bitcast i8 %__u to <8 x i1>
468 %4 = and <8 x i1> %2, %3
469 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
470 %6 = bitcast <16 x i1> %5 to i16
475 define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
476 ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
477 ; VLX: # %bb.0: # %entry
478 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
479 ; VLX-NEXT: kmovd %k0, %eax
482 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
483 ; NoVLX: # %bb.0: # %entry
484 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
485 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
486 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
487 ; NoVLX-NEXT: kmovw %k0, %eax
488 ; NoVLX-NEXT: vzeroupper
491 %0 = bitcast <2 x i64> %__a to <8 x i16>
492 %1 = bitcast <2 x i64> %__b to <8 x i16>
493 %2 = icmp eq <8 x i16> %0, %1
494 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
495 %4 = bitcast <32 x i1> %3 to i32
499 define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
500 ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
501 ; VLX: # %bb.0: # %entry
502 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
503 ; VLX-NEXT: kmovd %k0, %eax
506 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
507 ; NoVLX: # %bb.0: # %entry
508 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
509 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
510 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
511 ; NoVLX-NEXT: kmovw %k0, %eax
512 ; NoVLX-NEXT: vzeroupper
515 %0 = bitcast <2 x i64> %__a to <8 x i16>
516 %load = load <2 x i64>, ptr %__b
517 %1 = bitcast <2 x i64> %load to <8 x i16>
518 %2 = icmp eq <8 x i16> %0, %1
519 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
520 %4 = bitcast <32 x i1> %3 to i32
524 define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
525 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
526 ; VLX: # %bb.0: # %entry
527 ; VLX-NEXT: kmovd %edi, %k1
528 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
529 ; VLX-NEXT: kmovd %k0, %eax
532 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
533 ; NoVLX: # %bb.0: # %entry
534 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
535 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
536 ; NoVLX-NEXT: kmovw %edi, %k1
537 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
538 ; NoVLX-NEXT: kmovw %k0, %eax
539 ; NoVLX-NEXT: vzeroupper
542 %0 = bitcast <2 x i64> %__a to <8 x i16>
543 %1 = bitcast <2 x i64> %__b to <8 x i16>
544 %2 = icmp eq <8 x i16> %0, %1
545 %3 = bitcast i8 %__u to <8 x i1>
546 %4 = and <8 x i1> %2, %3
547 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
548 %6 = bitcast <32 x i1> %5 to i32
552 define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
553 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
554 ; VLX: # %bb.0: # %entry
555 ; VLX-NEXT: kmovd %edi, %k1
556 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
557 ; VLX-NEXT: kmovd %k0, %eax
560 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
561 ; NoVLX: # %bb.0: # %entry
562 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
563 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
564 ; NoVLX-NEXT: kmovw %edi, %k1
565 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
566 ; NoVLX-NEXT: kmovw %k0, %eax
567 ; NoVLX-NEXT: vzeroupper
570 %0 = bitcast <2 x i64> %__a to <8 x i16>
571 %load = load <2 x i64>, ptr %__b
572 %1 = bitcast <2 x i64> %load to <8 x i16>
573 %2 = icmp eq <8 x i16> %0, %1
574 %3 = bitcast i8 %__u to <8 x i1>
575 %4 = and <8 x i1> %2, %3
576 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
577 %6 = bitcast <32 x i1> %5 to i32
582 define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
583 ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
584 ; VLX: # %bb.0: # %entry
585 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
586 ; VLX-NEXT: kmovq %k0, %rax
589 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
590 ; NoVLX: # %bb.0: # %entry
591 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
592 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
593 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
594 ; NoVLX-NEXT: kmovw %k0, %eax
595 ; NoVLX-NEXT: vzeroupper
598 %0 = bitcast <2 x i64> %__a to <8 x i16>
599 %1 = bitcast <2 x i64> %__b to <8 x i16>
600 %2 = icmp eq <8 x i16> %0, %1
601 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
602 %4 = bitcast <64 x i1> %3 to i64
606 define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
607 ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
608 ; VLX: # %bb.0: # %entry
609 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
610 ; VLX-NEXT: kmovq %k0, %rax
613 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
614 ; NoVLX: # %bb.0: # %entry
615 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
616 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
617 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
618 ; NoVLX-NEXT: kmovw %k0, %eax
619 ; NoVLX-NEXT: vzeroupper
622 %0 = bitcast <2 x i64> %__a to <8 x i16>
623 %load = load <2 x i64>, ptr %__b
624 %1 = bitcast <2 x i64> %load to <8 x i16>
625 %2 = icmp eq <8 x i16> %0, %1
626 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
627 %4 = bitcast <64 x i1> %3 to i64
631 define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
632 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
633 ; VLX: # %bb.0: # %entry
634 ; VLX-NEXT: kmovd %edi, %k1
635 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
636 ; VLX-NEXT: kmovq %k0, %rax
639 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
640 ; NoVLX: # %bb.0: # %entry
641 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
642 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
643 ; NoVLX-NEXT: kmovw %edi, %k1
644 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
645 ; NoVLX-NEXT: kmovw %k0, %eax
646 ; NoVLX-NEXT: vzeroupper
649 %0 = bitcast <2 x i64> %__a to <8 x i16>
650 %1 = bitcast <2 x i64> %__b to <8 x i16>
651 %2 = icmp eq <8 x i16> %0, %1
652 %3 = bitcast i8 %__u to <8 x i1>
653 %4 = and <8 x i1> %2, %3
654 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
655 %6 = bitcast <64 x i1> %5 to i64
659 define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
660 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
661 ; VLX: # %bb.0: # %entry
662 ; VLX-NEXT: kmovd %edi, %k1
663 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
664 ; VLX-NEXT: kmovq %k0, %rax
667 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
668 ; NoVLX: # %bb.0: # %entry
669 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
670 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
671 ; NoVLX-NEXT: kmovw %edi, %k1
672 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
673 ; NoVLX-NEXT: kmovw %k0, %eax
674 ; NoVLX-NEXT: vzeroupper
677 %0 = bitcast <2 x i64> %__a to <8 x i16>
678 %load = load <2 x i64>, ptr %__b
679 %1 = bitcast <2 x i64> %load to <8 x i16>
680 %2 = icmp eq <8 x i16> %0, %1
681 %3 = bitcast i8 %__u to <8 x i1>
682 %4 = and <8 x i1> %2, %3
683 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
684 %6 = bitcast <64 x i1> %5 to i64
689 define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
690 ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
691 ; VLX: # %bb.0: # %entry
692 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
693 ; VLX-NEXT: kmovd %k0, %eax
694 ; VLX-NEXT: vzeroupper
697 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
698 ; NoVLX: # %bb.0: # %entry
699 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
700 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
701 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
702 ; NoVLX-NEXT: kmovw %k0, %eax
703 ; NoVLX-NEXT: vzeroupper
706 %0 = bitcast <4 x i64> %__a to <16 x i16>
707 %1 = bitcast <4 x i64> %__b to <16 x i16>
708 %2 = icmp eq <16 x i16> %0, %1
709 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
710 %4 = bitcast <32 x i1> %3 to i32
714 define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
715 ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
716 ; VLX: # %bb.0: # %entry
717 ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
718 ; VLX-NEXT: kmovd %k0, %eax
719 ; VLX-NEXT: vzeroupper
722 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
723 ; NoVLX: # %bb.0: # %entry
724 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
725 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
726 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
727 ; NoVLX-NEXT: kmovw %k0, %eax
728 ; NoVLX-NEXT: vzeroupper
731 %0 = bitcast <4 x i64> %__a to <16 x i16>
732 %load = load <4 x i64>, ptr %__b
733 %1 = bitcast <4 x i64> %load to <16 x i16>
734 %2 = icmp eq <16 x i16> %0, %1
735 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
736 %4 = bitcast <32 x i1> %3 to i32
740 define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
741 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
742 ; VLX: # %bb.0: # %entry
743 ; VLX-NEXT: kmovd %edi, %k1
744 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
745 ; VLX-NEXT: kmovd %k0, %eax
746 ; VLX-NEXT: vzeroupper
749 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
750 ; NoVLX: # %bb.0: # %entry
751 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
752 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
753 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
754 ; NoVLX-NEXT: kmovw %k0, %eax
755 ; NoVLX-NEXT: andl %edi, %eax
756 ; NoVLX-NEXT: vzeroupper
759 %0 = bitcast <4 x i64> %__a to <16 x i16>
760 %1 = bitcast <4 x i64> %__b to <16 x i16>
761 %2 = icmp eq <16 x i16> %0, %1
762 %3 = bitcast i16 %__u to <16 x i1>
763 %4 = and <16 x i1> %2, %3
764 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
765 %6 = bitcast <32 x i1> %5 to i32
769 define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
770 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
771 ; VLX: # %bb.0: # %entry
772 ; VLX-NEXT: kmovd %edi, %k1
773 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
774 ; VLX-NEXT: kmovd %k0, %eax
775 ; VLX-NEXT: vzeroupper
778 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
779 ; NoVLX: # %bb.0: # %entry
780 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
781 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
782 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
783 ; NoVLX-NEXT: kmovw %k0, %eax
784 ; NoVLX-NEXT: andl %edi, %eax
785 ; NoVLX-NEXT: vzeroupper
788 %0 = bitcast <4 x i64> %__a to <16 x i16>
789 %load = load <4 x i64>, ptr %__b
790 %1 = bitcast <4 x i64> %load to <16 x i16>
791 %2 = icmp eq <16 x i16> %0, %1
792 %3 = bitcast i16 %__u to <16 x i1>
793 %4 = and <16 x i1> %2, %3
794 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
795 %6 = bitcast <32 x i1> %5 to i32
800 define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
801 ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
802 ; VLX: # %bb.0: # %entry
803 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
804 ; VLX-NEXT: kmovq %k0, %rax
805 ; VLX-NEXT: vzeroupper
808 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
809 ; NoVLX: # %bb.0: # %entry
810 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
811 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
812 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
813 ; NoVLX-NEXT: kmovw %k0, %eax
814 ; NoVLX-NEXT: vzeroupper
817 %0 = bitcast <4 x i64> %__a to <16 x i16>
818 %1 = bitcast <4 x i64> %__b to <16 x i16>
819 %2 = icmp eq <16 x i16> %0, %1
820 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
821 %4 = bitcast <64 x i1> %3 to i64
825 define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
826 ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
827 ; VLX: # %bb.0: # %entry
828 ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
829 ; VLX-NEXT: kmovq %k0, %rax
830 ; VLX-NEXT: vzeroupper
833 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
834 ; NoVLX: # %bb.0: # %entry
835 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
836 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
837 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
838 ; NoVLX-NEXT: kmovw %k0, %eax
839 ; NoVLX-NEXT: vzeroupper
842 %0 = bitcast <4 x i64> %__a to <16 x i16>
843 %load = load <4 x i64>, ptr %__b
844 %1 = bitcast <4 x i64> %load to <16 x i16>
845 %2 = icmp eq <16 x i16> %0, %1
846 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
847 %4 = bitcast <64 x i1> %3 to i64
851 define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
852 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
853 ; VLX: # %bb.0: # %entry
854 ; VLX-NEXT: kmovd %edi, %k1
855 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
856 ; VLX-NEXT: kmovq %k0, %rax
857 ; VLX-NEXT: vzeroupper
860 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
861 ; NoVLX: # %bb.0: # %entry
862 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
863 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
864 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
865 ; NoVLX-NEXT: kmovw %k0, %eax
866 ; NoVLX-NEXT: andl %edi, %eax
867 ; NoVLX-NEXT: vzeroupper
870 %0 = bitcast <4 x i64> %__a to <16 x i16>
871 %1 = bitcast <4 x i64> %__b to <16 x i16>
872 %2 = icmp eq <16 x i16> %0, %1
873 %3 = bitcast i16 %__u to <16 x i1>
874 %4 = and <16 x i1> %2, %3
875 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
876 %6 = bitcast <64 x i1> %5 to i64
880 define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
881 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
882 ; VLX: # %bb.0: # %entry
883 ; VLX-NEXT: kmovd %edi, %k1
884 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
885 ; VLX-NEXT: kmovq %k0, %rax
886 ; VLX-NEXT: vzeroupper
889 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
890 ; NoVLX: # %bb.0: # %entry
891 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
892 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
893 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
894 ; NoVLX-NEXT: kmovw %k0, %eax
895 ; NoVLX-NEXT: andl %edi, %eax
896 ; NoVLX-NEXT: vzeroupper
899 %0 = bitcast <4 x i64> %__a to <16 x i16>
900 %load = load <4 x i64>, ptr %__b
901 %1 = bitcast <4 x i64> %load to <16 x i16>
902 %2 = icmp eq <16 x i16> %0, %1
903 %3 = bitcast i16 %__u to <16 x i1>
904 %4 = and <16 x i1> %2, %3
905 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
906 %6 = bitcast <64 x i1> %5 to i64
911 define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
912 ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
913 ; VLX: # %bb.0: # %entry
914 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
915 ; VLX-NEXT: kmovq %k0, %rax
916 ; VLX-NEXT: vzeroupper
919 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
920 ; NoVLX: # %bb.0: # %entry
921 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2
922 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
923 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
924 ; NoVLX-NEXT: kmovw %k0, %ecx
925 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
926 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
927 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
928 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
929 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
930 ; NoVLX-NEXT: kmovw %k0, %eax
931 ; NoVLX-NEXT: shll $16, %eax
932 ; NoVLX-NEXT: orl %ecx, %eax
933 ; NoVLX-NEXT: vzeroupper
936 %0 = bitcast <8 x i64> %__a to <32 x i16>
937 %1 = bitcast <8 x i64> %__b to <32 x i16>
938 %2 = icmp eq <32 x i16> %0, %1
939 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
940 %4 = bitcast <64 x i1> %3 to i64
944 define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
945 ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
946 ; VLX: # %bb.0: # %entry
947 ; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
948 ; VLX-NEXT: kmovq %k0, %rax
949 ; VLX-NEXT: vzeroupper
952 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
953 ; NoVLX: # %bb.0: # %entry
954 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm1
955 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
956 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
957 ; NoVLX-NEXT: kmovw %k0, %ecx
958 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
959 ; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm0, %ymm0
960 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
961 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
962 ; NoVLX-NEXT: kmovw %k0, %eax
963 ; NoVLX-NEXT: shll $16, %eax
964 ; NoVLX-NEXT: orl %ecx, %eax
965 ; NoVLX-NEXT: vzeroupper
968 %0 = bitcast <8 x i64> %__a to <32 x i16>
969 %load = load <8 x i64>, ptr %__b
970 %1 = bitcast <8 x i64> %load to <32 x i16>
971 %2 = icmp eq <32 x i16> %0, %1
972 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
973 %4 = bitcast <64 x i1> %3 to i64
977 define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
978 ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
979 ; VLX: # %bb.0: # %entry
980 ; VLX-NEXT: kmovd %edi, %k1
981 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
982 ; VLX-NEXT: kmovq %k0, %rax
983 ; VLX-NEXT: vzeroupper
986 ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
987 ; NoVLX: # %bb.0: # %entry
988 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2
989 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
990 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
991 ; NoVLX-NEXT: kmovw %k0, %eax
992 ; NoVLX-NEXT: andl %edi, %eax
993 ; NoVLX-NEXT: shrl $16, %edi
994 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
995 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
996 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
997 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
998 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
999 ; NoVLX-NEXT: kmovw %k0, %ecx
1000 ; NoVLX-NEXT: andl %edi, %ecx
1001 ; NoVLX-NEXT: shll $16, %ecx
1002 ; NoVLX-NEXT: movzwl %ax, %eax
1003 ; NoVLX-NEXT: orl %ecx, %eax
1004 ; NoVLX-NEXT: vzeroupper
1007 %0 = bitcast <8 x i64> %__a to <32 x i16>
1008 %1 = bitcast <8 x i64> %__b to <32 x i16>
1009 %2 = icmp eq <32 x i16> %0, %1
1010 %3 = bitcast i32 %__u to <32 x i1>
1011 %4 = and <32 x i1> %2, %3
1012 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1013 %6 = bitcast <64 x i1> %5 to i64
1017 define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
1018 ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1019 ; VLX: # %bb.0: # %entry
1020 ; VLX-NEXT: kmovd %edi, %k1
1021 ; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
1022 ; VLX-NEXT: kmovq %k0, %rax
1023 ; VLX-NEXT: vzeroupper
1026 ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1027 ; NoVLX: # %bb.0: # %entry
1028 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm1
1029 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
1030 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1031 ; NoVLX-NEXT: kmovw %k0, %eax
1032 ; NoVLX-NEXT: andl %edi, %eax
1033 ; NoVLX-NEXT: shrl $16, %edi
1034 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1035 ; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0
1036 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1037 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1038 ; NoVLX-NEXT: kmovw %k0, %ecx
1039 ; NoVLX-NEXT: andl %edi, %ecx
1040 ; NoVLX-NEXT: shll $16, %ecx
1041 ; NoVLX-NEXT: movzwl %ax, %eax
1042 ; NoVLX-NEXT: orl %ecx, %eax
1043 ; NoVLX-NEXT: vzeroupper
1046 %0 = bitcast <8 x i64> %__a to <32 x i16>
1047 %load = load <8 x i64>, ptr %__b
1048 %1 = bitcast <8 x i64> %load to <32 x i16>
1049 %2 = icmp eq <32 x i16> %0, %1
1050 %3 = bitcast i32 %__u to <32 x i1>
1051 %4 = and <32 x i1> %2, %3
1052 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1053 %6 = bitcast <64 x i1> %5 to i64
1058 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1059 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1060 ; VLX: # %bb.0: # %entry
1061 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1062 ; VLX-NEXT: kmovd %k0, %eax
1063 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1066 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1067 ; NoVLX: # %bb.0: # %entry
1068 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1069 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1070 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1071 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1072 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1073 ; NoVLX-NEXT: kmovw %k0, %eax
1074 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1075 ; NoVLX-NEXT: vzeroupper
1078 %0 = bitcast <2 x i64> %__a to <4 x i32>
1079 %1 = bitcast <2 x i64> %__b to <4 x i32>
1080 %2 = icmp eq <4 x i32> %0, %1
1081 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1082 %4 = bitcast <8 x i1> %3 to i8
1086 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1087 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1088 ; VLX: # %bb.0: # %entry
1089 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1090 ; VLX-NEXT: kmovd %k0, %eax
1091 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1094 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1095 ; NoVLX: # %bb.0: # %entry
1096 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1097 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1098 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1099 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1100 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1101 ; NoVLX-NEXT: kmovw %k0, %eax
1102 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1103 ; NoVLX-NEXT: vzeroupper
1106 %0 = bitcast <2 x i64> %__a to <4 x i32>
1107 %load = load <2 x i64>, ptr %__b
1108 %1 = bitcast <2 x i64> %load to <4 x i32>
1109 %2 = icmp eq <4 x i32> %0, %1
1110 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1111 %4 = bitcast <8 x i1> %3 to i8
1115 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1116 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1117 ; VLX: # %bb.0: # %entry
1118 ; VLX-NEXT: kmovd %edi, %k1
1119 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1120 ; VLX-NEXT: kmovd %k0, %eax
1121 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1124 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1125 ; NoVLX: # %bb.0: # %entry
1126 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1127 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1128 ; NoVLX-NEXT: kmovw %edi, %k1
1129 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1130 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1131 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1132 ; NoVLX-NEXT: kmovw %k0, %eax
1133 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1134 ; NoVLX-NEXT: vzeroupper
1137 %0 = bitcast <2 x i64> %__a to <4 x i32>
1138 %1 = bitcast <2 x i64> %__b to <4 x i32>
1139 %2 = icmp eq <4 x i32> %0, %1
1140 %3 = bitcast i8 %__u to <8 x i1>
1141 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1142 %4 = and <4 x i1> %2, %extract.i
1143 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1144 %6 = bitcast <8 x i1> %5 to i8
1148 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1149 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1150 ; VLX: # %bb.0: # %entry
1151 ; VLX-NEXT: kmovd %edi, %k1
1152 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1153 ; VLX-NEXT: kmovd %k0, %eax
1154 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1157 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1158 ; NoVLX: # %bb.0: # %entry
1159 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1160 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1161 ; NoVLX-NEXT: kmovw %edi, %k1
1162 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1163 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1164 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1165 ; NoVLX-NEXT: kmovw %k0, %eax
1166 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1167 ; NoVLX-NEXT: vzeroupper
1170 %0 = bitcast <2 x i64> %__a to <4 x i32>
1171 %load = load <2 x i64>, ptr %__b
1172 %1 = bitcast <2 x i64> %load to <4 x i32>
1173 %2 = icmp eq <4 x i32> %0, %1
1174 %3 = bitcast i8 %__u to <8 x i1>
1175 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1176 %4 = and <4 x i1> %2, %extract.i
1177 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1178 %6 = bitcast <8 x i1> %5 to i8
1183 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1184 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1185 ; VLX: # %bb.0: # %entry
1186 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1187 ; VLX-NEXT: kmovd %k0, %eax
1188 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1191 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1192 ; NoVLX: # %bb.0: # %entry
1193 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1194 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1195 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1196 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1197 ; NoVLX-NEXT: kmovw %k0, %eax
1198 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1199 ; NoVLX-NEXT: vzeroupper
1202 %0 = bitcast <2 x i64> %__a to <4 x i32>
1203 %load = load i32, ptr %__b
1204 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1205 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1206 %2 = icmp eq <4 x i32> %0, %1
1207 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1208 %4 = bitcast <8 x i1> %3 to i8
1212 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1213 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1214 ; VLX: # %bb.0: # %entry
1215 ; VLX-NEXT: kmovd %edi, %k1
1216 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1217 ; VLX-NEXT: kmovd %k0, %eax
1218 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1221 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1222 ; NoVLX: # %bb.0: # %entry
1223 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1224 ; NoVLX-NEXT: kmovw %edi, %k1
1225 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1226 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1227 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1228 ; NoVLX-NEXT: kmovw %k0, %eax
1229 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1230 ; NoVLX-NEXT: vzeroupper
1233 %0 = bitcast <2 x i64> %__a to <4 x i32>
1234 %load = load i32, ptr %__b
1235 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1236 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1237 %2 = icmp eq <4 x i32> %0, %1
1238 %3 = bitcast i8 %__u to <8 x i1>
1239 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1240 %4 = and <4 x i1> %extract.i, %2
1241 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1242 %6 = bitcast <8 x i1> %5 to i8
1247 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1248 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1249 ; VLX: # %bb.0: # %entry
1250 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1251 ; VLX-NEXT: kmovd %k0, %eax
1252 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1255 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1256 ; NoVLX: # %bb.0: # %entry
1257 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1258 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1259 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1260 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1261 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1262 ; NoVLX-NEXT: kmovw %k0, %eax
1263 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1264 ; NoVLX-NEXT: vzeroupper
1267 %0 = bitcast <2 x i64> %__a to <4 x i32>
1268 %1 = bitcast <2 x i64> %__b to <4 x i32>
1269 %2 = icmp eq <4 x i32> %0, %1
1270 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1271 %4 = bitcast <16 x i1> %3 to i16
1275 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1276 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1277 ; VLX: # %bb.0: # %entry
1278 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1279 ; VLX-NEXT: kmovd %k0, %eax
1280 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1283 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1284 ; NoVLX: # %bb.0: # %entry
1285 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1286 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1287 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1288 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1289 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1290 ; NoVLX-NEXT: kmovw %k0, %eax
1291 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1292 ; NoVLX-NEXT: vzeroupper
1295 %0 = bitcast <2 x i64> %__a to <4 x i32>
1296 %load = load <2 x i64>, ptr %__b
1297 %1 = bitcast <2 x i64> %load to <4 x i32>
1298 %2 = icmp eq <4 x i32> %0, %1
1299 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1300 %4 = bitcast <16 x i1> %3 to i16
1304 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1305 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1306 ; VLX: # %bb.0: # %entry
1307 ; VLX-NEXT: kmovd %edi, %k1
1308 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1309 ; VLX-NEXT: kmovd %k0, %eax
1310 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1313 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1314 ; NoVLX: # %bb.0: # %entry
1315 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1316 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1317 ; NoVLX-NEXT: kmovw %edi, %k1
1318 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1319 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1320 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1321 ; NoVLX-NEXT: kmovw %k0, %eax
1322 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1323 ; NoVLX-NEXT: vzeroupper
1326 %0 = bitcast <2 x i64> %__a to <4 x i32>
1327 %1 = bitcast <2 x i64> %__b to <4 x i32>
1328 %2 = icmp eq <4 x i32> %0, %1
1329 %3 = bitcast i8 %__u to <8 x i1>
1330 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1331 %4 = and <4 x i1> %2, %extract.i
1332 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1333 %6 = bitcast <16 x i1> %5 to i16
1337 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1338 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1339 ; VLX: # %bb.0: # %entry
1340 ; VLX-NEXT: kmovd %edi, %k1
1341 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1342 ; VLX-NEXT: kmovd %k0, %eax
1343 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1346 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1347 ; NoVLX: # %bb.0: # %entry
1348 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1349 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1350 ; NoVLX-NEXT: kmovw %edi, %k1
1351 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1352 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1353 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1354 ; NoVLX-NEXT: kmovw %k0, %eax
1355 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1356 ; NoVLX-NEXT: vzeroupper
1359 %0 = bitcast <2 x i64> %__a to <4 x i32>
1360 %load = load <2 x i64>, ptr %__b
1361 %1 = bitcast <2 x i64> %load to <4 x i32>
1362 %2 = icmp eq <4 x i32> %0, %1
1363 %3 = bitcast i8 %__u to <8 x i1>
1364 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1365 %4 = and <4 x i1> %2, %extract.i
1366 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1367 %6 = bitcast <16 x i1> %5 to i16
1372 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1373 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1374 ; VLX: # %bb.0: # %entry
1375 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1376 ; VLX-NEXT: kmovd %k0, %eax
1377 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1380 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1381 ; NoVLX: # %bb.0: # %entry
1382 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1383 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1384 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1385 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1386 ; NoVLX-NEXT: kmovw %k0, %eax
1387 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1388 ; NoVLX-NEXT: vzeroupper
1391 %0 = bitcast <2 x i64> %__a to <4 x i32>
1392 %load = load i32, ptr %__b
1393 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1394 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1395 %2 = icmp eq <4 x i32> %0, %1
1396 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1397 %4 = bitcast <16 x i1> %3 to i16
1401 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1402 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1403 ; VLX: # %bb.0: # %entry
1404 ; VLX-NEXT: kmovd %edi, %k1
1405 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1406 ; VLX-NEXT: kmovd %k0, %eax
1407 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1410 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1411 ; NoVLX: # %bb.0: # %entry
1412 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1413 ; NoVLX-NEXT: kmovw %edi, %k1
1414 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1415 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1416 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1417 ; NoVLX-NEXT: kmovw %k0, %eax
1418 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1419 ; NoVLX-NEXT: vzeroupper
1422 %0 = bitcast <2 x i64> %__a to <4 x i32>
1423 %load = load i32, ptr %__b
1424 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1425 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1426 %2 = icmp eq <4 x i32> %0, %1
1427 %3 = bitcast i8 %__u to <8 x i1>
1428 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1429 %4 = and <4 x i1> %extract.i, %2
1430 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1431 %6 = bitcast <16 x i1> %5 to i16
1436 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1437 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1438 ; VLX: # %bb.0: # %entry
1439 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1440 ; VLX-NEXT: kmovd %k0, %eax
1443 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1444 ; NoVLX: # %bb.0: # %entry
1445 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1446 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1447 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1448 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1449 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1450 ; NoVLX-NEXT: kmovw %k0, %eax
1451 ; NoVLX-NEXT: vzeroupper
1454 %0 = bitcast <2 x i64> %__a to <4 x i32>
1455 %1 = bitcast <2 x i64> %__b to <4 x i32>
1456 %2 = icmp eq <4 x i32> %0, %1
1457 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1458 %4 = bitcast <32 x i1> %3 to i32
1462 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1463 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1464 ; VLX: # %bb.0: # %entry
1465 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1466 ; VLX-NEXT: kmovd %k0, %eax
1469 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1470 ; NoVLX: # %bb.0: # %entry
1471 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1472 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1473 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1474 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1475 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1476 ; NoVLX-NEXT: kmovw %k0, %eax
1477 ; NoVLX-NEXT: vzeroupper
1480 %0 = bitcast <2 x i64> %__a to <4 x i32>
1481 %load = load <2 x i64>, ptr %__b
1482 %1 = bitcast <2 x i64> %load to <4 x i32>
1483 %2 = icmp eq <4 x i32> %0, %1
1484 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1485 %4 = bitcast <32 x i1> %3 to i32
1489 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1490 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1491 ; VLX: # %bb.0: # %entry
1492 ; VLX-NEXT: kmovd %edi, %k1
1493 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1494 ; VLX-NEXT: kmovd %k0, %eax
1497 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1498 ; NoVLX: # %bb.0: # %entry
1499 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1500 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1501 ; NoVLX-NEXT: kmovw %edi, %k1
1502 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1503 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1504 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1505 ; NoVLX-NEXT: kmovw %k0, %eax
1506 ; NoVLX-NEXT: vzeroupper
1509 %0 = bitcast <2 x i64> %__a to <4 x i32>
1510 %1 = bitcast <2 x i64> %__b to <4 x i32>
1511 %2 = icmp eq <4 x i32> %0, %1
1512 %3 = bitcast i8 %__u to <8 x i1>
1513 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1514 %4 = and <4 x i1> %2, %extract.i
1515 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1516 %6 = bitcast <32 x i1> %5 to i32
1520 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1521 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1522 ; VLX: # %bb.0: # %entry
1523 ; VLX-NEXT: kmovd %edi, %k1
1524 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1525 ; VLX-NEXT: kmovd %k0, %eax
1528 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1529 ; NoVLX: # %bb.0: # %entry
1530 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1531 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1532 ; NoVLX-NEXT: kmovw %edi, %k1
1533 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1534 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1535 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1536 ; NoVLX-NEXT: kmovw %k0, %eax
1537 ; NoVLX-NEXT: vzeroupper
1540 %0 = bitcast <2 x i64> %__a to <4 x i32>
1541 %load = load <2 x i64>, ptr %__b
1542 %1 = bitcast <2 x i64> %load to <4 x i32>
1543 %2 = icmp eq <4 x i32> %0, %1
1544 %3 = bitcast i8 %__u to <8 x i1>
1545 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1546 %4 = and <4 x i1> %2, %extract.i
1547 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1548 %6 = bitcast <32 x i1> %5 to i32
1553 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1554 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1555 ; VLX: # %bb.0: # %entry
1556 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1557 ; VLX-NEXT: kmovd %k0, %eax
1560 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1561 ; NoVLX: # %bb.0: # %entry
1562 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1563 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1564 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1565 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1566 ; NoVLX-NEXT: kmovw %k0, %eax
1567 ; NoVLX-NEXT: vzeroupper
1570 %0 = bitcast <2 x i64> %__a to <4 x i32>
1571 %load = load i32, ptr %__b
1572 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1573 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1574 %2 = icmp eq <4 x i32> %0, %1
1575 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1576 %4 = bitcast <32 x i1> %3 to i32
1580 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1581 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1582 ; VLX: # %bb.0: # %entry
1583 ; VLX-NEXT: kmovd %edi, %k1
1584 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1585 ; VLX-NEXT: kmovd %k0, %eax
1588 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1589 ; NoVLX: # %bb.0: # %entry
1590 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1591 ; NoVLX-NEXT: kmovw %edi, %k1
1592 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1593 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1594 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1595 ; NoVLX-NEXT: kmovw %k0, %eax
1596 ; NoVLX-NEXT: vzeroupper
1599 %0 = bitcast <2 x i64> %__a to <4 x i32>
1600 %load = load i32, ptr %__b
1601 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1602 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1603 %2 = icmp eq <4 x i32> %0, %1
1604 %3 = bitcast i8 %__u to <8 x i1>
1605 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1606 %4 = and <4 x i1> %extract.i, %2
1607 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1608 %6 = bitcast <32 x i1> %5 to i32
1613 define i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_i32(i32 %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1614 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_i32:
1615 ; VLX: # %bb.0: # %entry
1616 ; VLX-NEXT: kmovd %edi, %k1
1617 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1618 ; VLX-NEXT: kmovb %k0, %eax
1621 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_i32:
1622 ; NoVLX: # %bb.0: # %entry
1623 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1624 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1625 ; NoVLX-NEXT: kmovw %edi, %k1
1626 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1627 ; NoVLX-NEXT: kmovw %k0, %eax
1628 ; NoVLX-NEXT: andl $15, %eax
1629 ; NoVLX-NEXT: vzeroupper
1632 %0 = bitcast <2 x i64> %__a to <4 x i32>
1633 %1 = bitcast <2 x i64> %__b to <4 x i32>
1634 %2 = icmp eq <4 x i32> %0, %1
1635 %3 = bitcast i32 %__u to <32 x i1>
1636 %extract.i = shufflevector <32 x i1> %3, <32 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1637 %4 = and <4 x i1> %2, %extract.i
1638 %5 = bitcast <4 x i1> %4 to i4
1639 %6 = zext i4 %5 to i32
1644 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1645 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1646 ; VLX: # %bb.0: # %entry
1647 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1648 ; VLX-NEXT: kmovq %k0, %rax
1651 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1652 ; NoVLX: # %bb.0: # %entry
1653 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1654 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1655 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1656 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1657 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1658 ; NoVLX-NEXT: kmovw %k0, %eax
1659 ; NoVLX-NEXT: vzeroupper
1662 %0 = bitcast <2 x i64> %__a to <4 x i32>
1663 %1 = bitcast <2 x i64> %__b to <4 x i32>
1664 %2 = icmp eq <4 x i32> %0, %1
1665 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1666 %4 = bitcast <64 x i1> %3 to i64
1670 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1671 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1672 ; VLX: # %bb.0: # %entry
1673 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1674 ; VLX-NEXT: kmovq %k0, %rax
1677 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1678 ; NoVLX: # %bb.0: # %entry
1679 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1680 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1681 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1682 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1683 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1684 ; NoVLX-NEXT: kmovw %k0, %eax
1685 ; NoVLX-NEXT: vzeroupper
1688 %0 = bitcast <2 x i64> %__a to <4 x i32>
1689 %load = load <2 x i64>, ptr %__b
1690 %1 = bitcast <2 x i64> %load to <4 x i32>
1691 %2 = icmp eq <4 x i32> %0, %1
1692 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1693 %4 = bitcast <64 x i1> %3 to i64
1697 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1698 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1699 ; VLX: # %bb.0: # %entry
1700 ; VLX-NEXT: kmovd %edi, %k1
1701 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1702 ; VLX-NEXT: kmovq %k0, %rax
1705 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1706 ; NoVLX: # %bb.0: # %entry
1707 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1708 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1709 ; NoVLX-NEXT: kmovw %edi, %k1
1710 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1711 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1712 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1713 ; NoVLX-NEXT: kmovw %k0, %eax
1714 ; NoVLX-NEXT: vzeroupper
1717 %0 = bitcast <2 x i64> %__a to <4 x i32>
1718 %1 = bitcast <2 x i64> %__b to <4 x i32>
1719 %2 = icmp eq <4 x i32> %0, %1
1720 %3 = bitcast i8 %__u to <8 x i1>
1721 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1722 %4 = and <4 x i1> %2, %extract.i
1723 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1724 %6 = bitcast <64 x i1> %5 to i64
1728 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1729 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1730 ; VLX: # %bb.0: # %entry
1731 ; VLX-NEXT: kmovd %edi, %k1
1732 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1733 ; VLX-NEXT: kmovq %k0, %rax
1736 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1737 ; NoVLX: # %bb.0: # %entry
1738 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1739 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1740 ; NoVLX-NEXT: kmovw %edi, %k1
1741 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1742 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1743 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1744 ; NoVLX-NEXT: kmovw %k0, %eax
1745 ; NoVLX-NEXT: vzeroupper
1748 %0 = bitcast <2 x i64> %__a to <4 x i32>
1749 %load = load <2 x i64>, ptr %__b
1750 %1 = bitcast <2 x i64> %load to <4 x i32>
1751 %2 = icmp eq <4 x i32> %0, %1
1752 %3 = bitcast i8 %__u to <8 x i1>
1753 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1754 %4 = and <4 x i1> %2, %extract.i
1755 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1756 %6 = bitcast <64 x i1> %5 to i64
1761 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
1762 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1763 ; VLX: # %bb.0: # %entry
1764 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1765 ; VLX-NEXT: kmovq %k0, %rax
1768 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1769 ; NoVLX: # %bb.0: # %entry
1770 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1771 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1772 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1773 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1774 ; NoVLX-NEXT: kmovw %k0, %eax
1775 ; NoVLX-NEXT: vzeroupper
1778 %0 = bitcast <2 x i64> %__a to <4 x i32>
1779 %load = load i32, ptr %__b
1780 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1781 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1782 %2 = icmp eq <4 x i32> %0, %1
1783 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1784 %4 = bitcast <64 x i1> %3 to i64
1788 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
1789 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1790 ; VLX: # %bb.0: # %entry
1791 ; VLX-NEXT: kmovd %edi, %k1
1792 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1793 ; VLX-NEXT: kmovq %k0, %rax
1796 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1797 ; NoVLX: # %bb.0: # %entry
1798 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1799 ; NoVLX-NEXT: kmovw %edi, %k1
1800 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1801 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1802 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1803 ; NoVLX-NEXT: kmovw %k0, %eax
1804 ; NoVLX-NEXT: vzeroupper
1807 %0 = bitcast <2 x i64> %__a to <4 x i32>
1808 %load = load i32, ptr %__b
1809 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1810 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1811 %2 = icmp eq <4 x i32> %0, %1
1812 %3 = bitcast i8 %__u to <8 x i1>
1813 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1814 %4 = and <4 x i1> %extract.i, %2
1815 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1816 %6 = bitcast <64 x i1> %5 to i64
1821 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1822 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1823 ; VLX: # %bb.0: # %entry
1824 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
1825 ; VLX-NEXT: kmovd %k0, %eax
1826 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1827 ; VLX-NEXT: vzeroupper
1830 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1831 ; NoVLX: # %bb.0: # %entry
1832 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1833 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1834 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1835 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1836 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1837 ; NoVLX-NEXT: kmovw %k0, %eax
1838 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1839 ; NoVLX-NEXT: vzeroupper
1842 %0 = bitcast <4 x i64> %__a to <8 x i32>
1843 %1 = bitcast <4 x i64> %__b to <8 x i32>
1844 %2 = icmp eq <8 x i32> %0, %1
1845 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1846 %4 = bitcast <16 x i1> %3 to i16
1850 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
1851 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1852 ; VLX: # %bb.0: # %entry
1853 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
1854 ; VLX-NEXT: kmovd %k0, %eax
1855 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1856 ; VLX-NEXT: vzeroupper
1859 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1860 ; NoVLX: # %bb.0: # %entry
1861 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1862 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
1863 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1864 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1865 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1866 ; NoVLX-NEXT: kmovw %k0, %eax
1867 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1868 ; NoVLX-NEXT: vzeroupper
1871 %0 = bitcast <4 x i64> %__a to <8 x i32>
1872 %load = load <4 x i64>, ptr %__b
1873 %1 = bitcast <4 x i64> %load to <8 x i32>
1874 %2 = icmp eq <8 x i32> %0, %1
1875 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1876 %4 = bitcast <16 x i1> %3 to i16
1880 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1881 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1882 ; VLX: # %bb.0: # %entry
1883 ; VLX-NEXT: kmovd %edi, %k1
1884 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
1885 ; VLX-NEXT: kmovd %k0, %eax
1886 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1887 ; VLX-NEXT: vzeroupper
1890 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1891 ; NoVLX: # %bb.0: # %entry
1892 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1893 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1894 ; NoVLX-NEXT: kmovw %edi, %k1
1895 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1896 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1897 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1898 ; NoVLX-NEXT: kmovw %k0, %eax
1899 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1900 ; NoVLX-NEXT: vzeroupper
1903 %0 = bitcast <4 x i64> %__a to <8 x i32>
1904 %1 = bitcast <4 x i64> %__b to <8 x i32>
1905 %2 = icmp eq <8 x i32> %0, %1
1906 %3 = bitcast i8 %__u to <8 x i1>
1907 %4 = and <8 x i1> %2, %3
1908 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1909 %6 = bitcast <16 x i1> %5 to i16
1913 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
1914 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1915 ; VLX: # %bb.0: # %entry
1916 ; VLX-NEXT: kmovd %edi, %k1
1917 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
1918 ; VLX-NEXT: kmovd %k0, %eax
1919 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1920 ; VLX-NEXT: vzeroupper
1923 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1924 ; NoVLX: # %bb.0: # %entry
1925 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1926 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
1927 ; NoVLX-NEXT: kmovw %edi, %k1
1928 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1929 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1930 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1931 ; NoVLX-NEXT: kmovw %k0, %eax
1932 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1933 ; NoVLX-NEXT: vzeroupper
1936 %0 = bitcast <4 x i64> %__a to <8 x i32>
1937 %load = load <4 x i64>, ptr %__b
1938 %1 = bitcast <4 x i64> %load to <8 x i32>
1939 %2 = icmp eq <8 x i32> %0, %1
1940 %3 = bitcast i8 %__u to <8 x i1>
1941 %4 = and <8 x i1> %2, %3
1942 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1943 %6 = bitcast <16 x i1> %5 to i16
1948 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
1949 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1950 ; VLX: # %bb.0: # %entry
1951 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
1952 ; VLX-NEXT: kmovd %k0, %eax
1953 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1954 ; VLX-NEXT: vzeroupper
1957 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1958 ; NoVLX: # %bb.0: # %entry
1959 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1960 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
1961 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1962 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1963 ; NoVLX-NEXT: kmovw %k0, %eax
1964 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1965 ; NoVLX-NEXT: vzeroupper
1968 %0 = bitcast <4 x i64> %__a to <8 x i32>
1969 %load = load i32, ptr %__b
1970 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
1971 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1972 %2 = icmp eq <8 x i32> %0, %1
1973 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1974 %4 = bitcast <16 x i1> %3 to i16
1978 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
1979 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1980 ; VLX: # %bb.0: # %entry
1981 ; VLX-NEXT: kmovd %edi, %k1
1982 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
1983 ; VLX-NEXT: kmovd %k0, %eax
1984 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1985 ; VLX-NEXT: vzeroupper
1988 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1989 ; NoVLX: # %bb.0: # %entry
1990 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1991 ; NoVLX-NEXT: kmovw %edi, %k1
1992 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
1993 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1994 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1995 ; NoVLX-NEXT: kmovw %k0, %eax
1996 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1997 ; NoVLX-NEXT: vzeroupper
2000 %0 = bitcast <4 x i64> %__a to <8 x i32>
2001 %load = load i32, ptr %__b
2002 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2003 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2004 %2 = icmp eq <8 x i32> %0, %1
2005 %3 = bitcast i8 %__u to <8 x i1>
2006 %4 = and <8 x i1> %3, %2
2007 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2008 %6 = bitcast <16 x i1> %5 to i16
2013 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2014 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
2015 ; VLX: # %bb.0: # %entry
2016 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
2017 ; VLX-NEXT: kmovd %k0, %eax
2018 ; VLX-NEXT: vzeroupper
2021 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
2022 ; NoVLX: # %bb.0: # %entry
2023 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2024 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2025 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2026 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2027 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2028 ; NoVLX-NEXT: kmovw %k0, %eax
2029 ; NoVLX-NEXT: vzeroupper
2032 %0 = bitcast <4 x i64> %__a to <8 x i32>
2033 %1 = bitcast <4 x i64> %__b to <8 x i32>
2034 %2 = icmp eq <8 x i32> %0, %1
2035 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2036 %4 = bitcast <32 x i1> %3 to i32
2040 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
2041 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2042 ; VLX: # %bb.0: # %entry
2043 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
2044 ; VLX-NEXT: kmovd %k0, %eax
2045 ; VLX-NEXT: vzeroupper
2048 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2049 ; NoVLX: # %bb.0: # %entry
2050 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2051 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
2052 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2053 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2054 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2055 ; NoVLX-NEXT: kmovw %k0, %eax
2056 ; NoVLX-NEXT: vzeroupper
2059 %0 = bitcast <4 x i64> %__a to <8 x i32>
2060 %load = load <4 x i64>, ptr %__b
2061 %1 = bitcast <4 x i64> %load to <8 x i32>
2062 %2 = icmp eq <8 x i32> %0, %1
2063 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2064 %4 = bitcast <32 x i1> %3 to i32
2068 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2069 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2070 ; VLX: # %bb.0: # %entry
2071 ; VLX-NEXT: kmovd %edi, %k1
2072 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2073 ; VLX-NEXT: kmovd %k0, %eax
2074 ; VLX-NEXT: vzeroupper
2077 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2078 ; NoVLX: # %bb.0: # %entry
2079 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2080 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2081 ; NoVLX-NEXT: kmovw %edi, %k1
2082 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2083 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2084 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2085 ; NoVLX-NEXT: kmovw %k0, %eax
2086 ; NoVLX-NEXT: vzeroupper
2089 %0 = bitcast <4 x i64> %__a to <8 x i32>
2090 %1 = bitcast <4 x i64> %__b to <8 x i32>
2091 %2 = icmp eq <8 x i32> %0, %1
2092 %3 = bitcast i8 %__u to <8 x i1>
2093 %4 = and <8 x i1> %2, %3
2094 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2095 %6 = bitcast <32 x i1> %5 to i32
2099 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
2100 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2101 ; VLX: # %bb.0: # %entry
2102 ; VLX-NEXT: kmovd %edi, %k1
2103 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2104 ; VLX-NEXT: kmovd %k0, %eax
2105 ; VLX-NEXT: vzeroupper
2108 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2109 ; NoVLX: # %bb.0: # %entry
2110 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2111 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
2112 ; NoVLX-NEXT: kmovw %edi, %k1
2113 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2114 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2115 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2116 ; NoVLX-NEXT: kmovw %k0, %eax
2117 ; NoVLX-NEXT: vzeroupper
2120 %0 = bitcast <4 x i64> %__a to <8 x i32>
2121 %load = load <4 x i64>, ptr %__b
2122 %1 = bitcast <4 x i64> %load to <8 x i32>
2123 %2 = icmp eq <8 x i32> %0, %1
2124 %3 = bitcast i8 %__u to <8 x i1>
2125 %4 = and <8 x i1> %2, %3
2126 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2127 %6 = bitcast <32 x i1> %5 to i32
2132 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
2133 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2134 ; VLX: # %bb.0: # %entry
2135 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2136 ; VLX-NEXT: kmovd %k0, %eax
2137 ; VLX-NEXT: vzeroupper
2140 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2141 ; NoVLX: # %bb.0: # %entry
2142 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2143 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2144 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2145 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2146 ; NoVLX-NEXT: kmovw %k0, %eax
2147 ; NoVLX-NEXT: vzeroupper
2150 %0 = bitcast <4 x i64> %__a to <8 x i32>
2151 %load = load i32, ptr %__b
2152 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2153 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2154 %2 = icmp eq <8 x i32> %0, %1
2155 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2156 %4 = bitcast <32 x i1> %3 to i32
2160 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
2161 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2162 ; VLX: # %bb.0: # %entry
2163 ; VLX-NEXT: kmovd %edi, %k1
2164 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2165 ; VLX-NEXT: kmovd %k0, %eax
2166 ; VLX-NEXT: vzeroupper
2169 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2170 ; NoVLX: # %bb.0: # %entry
2171 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2172 ; NoVLX-NEXT: kmovw %edi, %k1
2173 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2174 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2175 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2176 ; NoVLX-NEXT: kmovw %k0, %eax
2177 ; NoVLX-NEXT: vzeroupper
2180 %0 = bitcast <4 x i64> %__a to <8 x i32>
2181 %load = load i32, ptr %__b
2182 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2183 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2184 %2 = icmp eq <8 x i32> %0, %1
2185 %3 = bitcast i8 %__u to <8 x i1>
2186 %4 = and <8 x i1> %3, %2
2187 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2188 %6 = bitcast <32 x i1> %5 to i32
2193 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2194 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2195 ; VLX: # %bb.0: # %entry
2196 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
2197 ; VLX-NEXT: kmovq %k0, %rax
2198 ; VLX-NEXT: vzeroupper
2201 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2202 ; NoVLX: # %bb.0: # %entry
2203 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2204 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2205 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2206 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2207 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2208 ; NoVLX-NEXT: kmovw %k0, %eax
2209 ; NoVLX-NEXT: vzeroupper
2212 %0 = bitcast <4 x i64> %__a to <8 x i32>
2213 %1 = bitcast <4 x i64> %__b to <8 x i32>
2214 %2 = icmp eq <8 x i32> %0, %1
2215 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2216 %4 = bitcast <64 x i1> %3 to i64
2220 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
2221 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2222 ; VLX: # %bb.0: # %entry
2223 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
2224 ; VLX-NEXT: kmovq %k0, %rax
2225 ; VLX-NEXT: vzeroupper
2228 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2229 ; NoVLX: # %bb.0: # %entry
2230 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2231 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
2232 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2233 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2234 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2235 ; NoVLX-NEXT: kmovw %k0, %eax
2236 ; NoVLX-NEXT: vzeroupper
2239 %0 = bitcast <4 x i64> %__a to <8 x i32>
2240 %load = load <4 x i64>, ptr %__b
2241 %1 = bitcast <4 x i64> %load to <8 x i32>
2242 %2 = icmp eq <8 x i32> %0, %1
2243 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2244 %4 = bitcast <64 x i1> %3 to i64
2248 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2249 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2250 ; VLX: # %bb.0: # %entry
2251 ; VLX-NEXT: kmovd %edi, %k1
2252 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2253 ; VLX-NEXT: kmovq %k0, %rax
2254 ; VLX-NEXT: vzeroupper
2257 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2258 ; NoVLX: # %bb.0: # %entry
2259 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2260 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2261 ; NoVLX-NEXT: kmovw %edi, %k1
2262 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2263 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2264 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2265 ; NoVLX-NEXT: kmovw %k0, %eax
2266 ; NoVLX-NEXT: vzeroupper
2269 %0 = bitcast <4 x i64> %__a to <8 x i32>
2270 %1 = bitcast <4 x i64> %__b to <8 x i32>
2271 %2 = icmp eq <8 x i32> %0, %1
2272 %3 = bitcast i8 %__u to <8 x i1>
2273 %4 = and <8 x i1> %2, %3
2274 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2275 %6 = bitcast <64 x i1> %5 to i64
2279 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
2280 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2281 ; VLX: # %bb.0: # %entry
2282 ; VLX-NEXT: kmovd %edi, %k1
2283 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2284 ; VLX-NEXT: kmovq %k0, %rax
2285 ; VLX-NEXT: vzeroupper
2288 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2289 ; NoVLX: # %bb.0: # %entry
2290 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2291 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
2292 ; NoVLX-NEXT: kmovw %edi, %k1
2293 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2294 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2295 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2296 ; NoVLX-NEXT: kmovw %k0, %eax
2297 ; NoVLX-NEXT: vzeroupper
2300 %0 = bitcast <4 x i64> %__a to <8 x i32>
2301 %load = load <4 x i64>, ptr %__b
2302 %1 = bitcast <4 x i64> %load to <8 x i32>
2303 %2 = icmp eq <8 x i32> %0, %1
2304 %3 = bitcast i8 %__u to <8 x i1>
2305 %4 = and <8 x i1> %2, %3
2306 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2307 %6 = bitcast <64 x i1> %5 to i64
2312 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
2313 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2314 ; VLX: # %bb.0: # %entry
2315 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2316 ; VLX-NEXT: kmovq %k0, %rax
2317 ; VLX-NEXT: vzeroupper
2320 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2321 ; NoVLX: # %bb.0: # %entry
2322 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2323 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2324 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2325 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2326 ; NoVLX-NEXT: kmovw %k0, %eax
2327 ; NoVLX-NEXT: vzeroupper
2330 %0 = bitcast <4 x i64> %__a to <8 x i32>
2331 %load = load i32, ptr %__b
2332 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2333 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2334 %2 = icmp eq <8 x i32> %0, %1
2335 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2336 %4 = bitcast <64 x i1> %3 to i64
2340 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
2341 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2342 ; VLX: # %bb.0: # %entry
2343 ; VLX-NEXT: kmovd %edi, %k1
2344 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2345 ; VLX-NEXT: kmovq %k0, %rax
2346 ; VLX-NEXT: vzeroupper
2349 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2350 ; NoVLX: # %bb.0: # %entry
2351 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2352 ; NoVLX-NEXT: kmovw %edi, %k1
2353 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2354 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2355 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2356 ; NoVLX-NEXT: kmovw %k0, %eax
2357 ; NoVLX-NEXT: vzeroupper
2360 %0 = bitcast <4 x i64> %__a to <8 x i32>
2361 %load = load i32, ptr %__b
2362 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2363 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2364 %2 = icmp eq <8 x i32> %0, %1
2365 %3 = bitcast i8 %__u to <8 x i1>
2366 %4 = and <8 x i1> %3, %2
2367 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2368 %6 = bitcast <64 x i1> %5 to i64
2373 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2374 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2375 ; VLX: # %bb.0: # %entry
2376 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2377 ; VLX-NEXT: kmovd %k0, %eax
2378 ; VLX-NEXT: vzeroupper
2381 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2382 ; NoVLX: # %bb.0: # %entry
2383 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2384 ; NoVLX-NEXT: kmovw %k0, %eax
2385 ; NoVLX-NEXT: vzeroupper
2388 %0 = bitcast <8 x i64> %__a to <16 x i32>
2389 %1 = bitcast <8 x i64> %__b to <16 x i32>
2390 %2 = icmp eq <16 x i32> %0, %1
2391 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2392 %4 = bitcast <32 x i1> %3 to i32
2396 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
2397 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2398 ; VLX: # %bb.0: # %entry
2399 ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2400 ; VLX-NEXT: kmovd %k0, %eax
2401 ; VLX-NEXT: vzeroupper
2404 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2405 ; NoVLX: # %bb.0: # %entry
2406 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2407 ; NoVLX-NEXT: kmovw %k0, %eax
2408 ; NoVLX-NEXT: vzeroupper
2411 %0 = bitcast <8 x i64> %__a to <16 x i32>
2412 %load = load <8 x i64>, ptr %__b
2413 %1 = bitcast <8 x i64> %load to <16 x i32>
2414 %2 = icmp eq <16 x i32> %0, %1
2415 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2416 %4 = bitcast <32 x i1> %3 to i32
2420 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2421 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2422 ; VLX: # %bb.0: # %entry
2423 ; VLX-NEXT: kmovd %edi, %k1
2424 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2425 ; VLX-NEXT: kmovd %k0, %eax
2426 ; VLX-NEXT: vzeroupper
2429 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2430 ; NoVLX: # %bb.0: # %entry
2431 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2432 ; NoVLX-NEXT: kmovw %k0, %eax
2433 ; NoVLX-NEXT: andl %edi, %eax
2434 ; NoVLX-NEXT: vzeroupper
2437 %0 = bitcast <8 x i64> %__a to <16 x i32>
2438 %1 = bitcast <8 x i64> %__b to <16 x i32>
2439 %2 = icmp eq <16 x i32> %0, %1
2440 %3 = bitcast i16 %__u to <16 x i1>
2441 %4 = and <16 x i1> %2, %3
2442 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2443 %6 = bitcast <32 x i1> %5 to i32
2447 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
2448 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2449 ; VLX: # %bb.0: # %entry
2450 ; VLX-NEXT: kmovd %edi, %k1
2451 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2452 ; VLX-NEXT: kmovd %k0, %eax
2453 ; VLX-NEXT: vzeroupper
2456 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2457 ; NoVLX: # %bb.0: # %entry
2458 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
2459 ; NoVLX-NEXT: kmovw %k0, %eax
2460 ; NoVLX-NEXT: andl %edi, %eax
2461 ; NoVLX-NEXT: vzeroupper
2464 %0 = bitcast <8 x i64> %__a to <16 x i32>
2465 %load = load <8 x i64>, ptr %__b
2466 %1 = bitcast <8 x i64> %load to <16 x i32>
2467 %2 = icmp eq <16 x i32> %0, %1
2468 %3 = bitcast i16 %__u to <16 x i1>
2469 %4 = and <16 x i1> %2, %3
2470 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2471 %6 = bitcast <32 x i1> %5 to i32
2476 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
2477 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2478 ; VLX: # %bb.0: # %entry
2479 ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2480 ; VLX-NEXT: kmovd %k0, %eax
2481 ; VLX-NEXT: vzeroupper
2484 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2485 ; NoVLX: # %bb.0: # %entry
2486 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2487 ; NoVLX-NEXT: kmovw %k0, %eax
2488 ; NoVLX-NEXT: vzeroupper
2491 %0 = bitcast <8 x i64> %__a to <16 x i32>
2492 %load = load i32, ptr %__b
2493 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2494 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2495 %2 = icmp eq <16 x i32> %0, %1
2496 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2497 %4 = bitcast <32 x i1> %3 to i32
2501 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
2502 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2503 ; VLX: # %bb.0: # %entry
2504 ; VLX-NEXT: kmovd %edi, %k1
2505 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2506 ; VLX-NEXT: kmovd %k0, %eax
2507 ; VLX-NEXT: vzeroupper
2510 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2511 ; NoVLX: # %bb.0: # %entry
2512 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2513 ; NoVLX-NEXT: kmovw %k0, %eax
2514 ; NoVLX-NEXT: andl %edi, %eax
2515 ; NoVLX-NEXT: vzeroupper
2518 %0 = bitcast <8 x i64> %__a to <16 x i32>
2519 %load = load i32, ptr %__b
2520 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2521 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2522 %2 = icmp eq <16 x i32> %0, %1
2523 %3 = bitcast i16 %__u to <16 x i1>
2524 %4 = and <16 x i1> %3, %2
2525 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2526 %6 = bitcast <32 x i1> %5 to i32
2531 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2532 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2533 ; VLX: # %bb.0: # %entry
2534 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2535 ; VLX-NEXT: kmovq %k0, %rax
2536 ; VLX-NEXT: vzeroupper
2539 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2540 ; NoVLX: # %bb.0: # %entry
2541 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2542 ; NoVLX-NEXT: kmovw %k0, %eax
2543 ; NoVLX-NEXT: vzeroupper
2546 %0 = bitcast <8 x i64> %__a to <16 x i32>
2547 %1 = bitcast <8 x i64> %__b to <16 x i32>
2548 %2 = icmp eq <16 x i32> %0, %1
2549 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2550 %4 = bitcast <64 x i1> %3 to i64
2554 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
2555 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2556 ; VLX: # %bb.0: # %entry
2557 ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2558 ; VLX-NEXT: kmovq %k0, %rax
2559 ; VLX-NEXT: vzeroupper
2562 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2563 ; NoVLX: # %bb.0: # %entry
2564 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2565 ; NoVLX-NEXT: kmovw %k0, %eax
2566 ; NoVLX-NEXT: vzeroupper
2569 %0 = bitcast <8 x i64> %__a to <16 x i32>
2570 %load = load <8 x i64>, ptr %__b
2571 %1 = bitcast <8 x i64> %load to <16 x i32>
2572 %2 = icmp eq <16 x i32> %0, %1
2573 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2574 %4 = bitcast <64 x i1> %3 to i64
2578 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2579 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2580 ; VLX: # %bb.0: # %entry
2581 ; VLX-NEXT: kmovd %edi, %k1
2582 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2583 ; VLX-NEXT: kmovq %k0, %rax
2584 ; VLX-NEXT: vzeroupper
2587 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2588 ; NoVLX: # %bb.0: # %entry
2589 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2590 ; NoVLX-NEXT: kmovw %k0, %eax
2591 ; NoVLX-NEXT: andl %edi, %eax
2592 ; NoVLX-NEXT: vzeroupper
2595 %0 = bitcast <8 x i64> %__a to <16 x i32>
2596 %1 = bitcast <8 x i64> %__b to <16 x i32>
2597 %2 = icmp eq <16 x i32> %0, %1
2598 %3 = bitcast i16 %__u to <16 x i1>
2599 %4 = and <16 x i1> %2, %3
2600 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2601 %6 = bitcast <64 x i1> %5 to i64
2605 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
2606 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2607 ; VLX: # %bb.0: # %entry
2608 ; VLX-NEXT: kmovd %edi, %k1
2609 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2610 ; VLX-NEXT: kmovq %k0, %rax
2611 ; VLX-NEXT: vzeroupper
2614 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2615 ; NoVLX: # %bb.0: # %entry
2616 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
2617 ; NoVLX-NEXT: kmovw %k0, %eax
2618 ; NoVLX-NEXT: andl %edi, %eax
2619 ; NoVLX-NEXT: vzeroupper
2622 %0 = bitcast <8 x i64> %__a to <16 x i32>
2623 %load = load <8 x i64>, ptr %__b
2624 %1 = bitcast <8 x i64> %load to <16 x i32>
2625 %2 = icmp eq <16 x i32> %0, %1
2626 %3 = bitcast i16 %__u to <16 x i1>
2627 %4 = and <16 x i1> %2, %3
2628 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2629 %6 = bitcast <64 x i1> %5 to i64
2634 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
2635 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2636 ; VLX: # %bb.0: # %entry
2637 ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2638 ; VLX-NEXT: kmovq %k0, %rax
2639 ; VLX-NEXT: vzeroupper
2642 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2643 ; NoVLX: # %bb.0: # %entry
2644 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2645 ; NoVLX-NEXT: kmovw %k0, %eax
2646 ; NoVLX-NEXT: vzeroupper
2649 %0 = bitcast <8 x i64> %__a to <16 x i32>
2650 %load = load i32, ptr %__b
2651 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2652 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2653 %2 = icmp eq <16 x i32> %0, %1
2654 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2655 %4 = bitcast <64 x i1> %3 to i64
2659 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
2660 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2661 ; VLX: # %bb.0: # %entry
2662 ; VLX-NEXT: kmovd %edi, %k1
2663 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2664 ; VLX-NEXT: kmovq %k0, %rax
2665 ; VLX-NEXT: vzeroupper
2668 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2669 ; NoVLX: # %bb.0: # %entry
2670 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2671 ; NoVLX-NEXT: kmovw %k0, %eax
2672 ; NoVLX-NEXT: andl %edi, %eax
2673 ; NoVLX-NEXT: vzeroupper
2676 %0 = bitcast <8 x i64> %__a to <16 x i32>
2677 %load = load i32, ptr %__b
2678 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2679 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2680 %2 = icmp eq <16 x i32> %0, %1
2681 %3 = bitcast i16 %__u to <16 x i1>
2682 %4 = and <16 x i1> %3, %2
2683 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2684 %6 = bitcast <64 x i1> %5 to i64
2689 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2690 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2691 ; VLX: # %bb.0: # %entry
2692 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
2693 ; VLX-NEXT: kmovb %k0, %eax
2696 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2697 ; NoVLX: # %bb.0: # %entry
2698 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2699 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2700 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2701 ; NoVLX-NEXT: kmovw %k0, %eax
2702 ; NoVLX-NEXT: andl $3, %eax
2703 ; NoVLX-NEXT: vzeroupper
2706 %0 = bitcast <2 x i64> %__a to <2 x i64>
2707 %1 = bitcast <2 x i64> %__b to <2 x i64>
2708 %2 = icmp eq <2 x i64> %0, %1
2709 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2710 %4 = bitcast <4 x i1> %3 to i4
2714 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
2715 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2716 ; VLX: # %bb.0: # %entry
2717 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
2718 ; VLX-NEXT: kmovb %k0, %eax
2721 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2722 ; NoVLX: # %bb.0: # %entry
2723 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2724 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2725 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2726 ; NoVLX-NEXT: kmovw %k0, %eax
2727 ; NoVLX-NEXT: andl $3, %eax
2728 ; NoVLX-NEXT: vzeroupper
2731 %0 = bitcast <2 x i64> %__a to <2 x i64>
2732 %load = load <2 x i64>, ptr %__b
2733 %1 = bitcast <2 x i64> %load to <2 x i64>
2734 %2 = icmp eq <2 x i64> %0, %1
2735 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2736 %4 = bitcast <4 x i1> %3 to i4
2740 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2741 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2742 ; VLX: # %bb.0: # %entry
2743 ; VLX-NEXT: kmovd %edi, %k1
2744 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2745 ; VLX-NEXT: kmovb %k0, %eax
2748 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2749 ; NoVLX: # %bb.0: # %entry
2750 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2751 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2752 ; NoVLX-NEXT: kmovw %edi, %k1
2753 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2754 ; NoVLX-NEXT: kmovw %k0, %eax
2755 ; NoVLX-NEXT: andl $3, %eax
2756 ; NoVLX-NEXT: vzeroupper
2759 %0 = bitcast <2 x i64> %__a to <2 x i64>
2760 %1 = bitcast <2 x i64> %__b to <2 x i64>
2761 %2 = icmp eq <2 x i64> %0, %1
2762 %3 = bitcast i8 %__u to <8 x i1>
2763 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2764 %4 = and <2 x i1> %2, %extract.i
2765 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2766 %6 = bitcast <4 x i1> %5 to i4
2770 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
2771 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2772 ; VLX: # %bb.0: # %entry
2773 ; VLX-NEXT: kmovd %edi, %k1
2774 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2775 ; VLX-NEXT: kmovb %k0, %eax
2778 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2779 ; NoVLX: # %bb.0: # %entry
2780 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2781 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2782 ; NoVLX-NEXT: kmovw %edi, %k1
2783 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2784 ; NoVLX-NEXT: kmovw %k0, %eax
2785 ; NoVLX-NEXT: andl $3, %eax
2786 ; NoVLX-NEXT: vzeroupper
2789 %0 = bitcast <2 x i64> %__a to <2 x i64>
2790 %load = load <2 x i64>, ptr %__b
2791 %1 = bitcast <2 x i64> %load to <2 x i64>
2792 %2 = icmp eq <2 x i64> %0, %1
2793 %3 = bitcast i8 %__u to <8 x i1>
2794 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2795 %4 = and <2 x i1> %2, %extract.i
2796 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2797 %6 = bitcast <4 x i1> %5 to i4
2802 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
2803 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2804 ; VLX: # %bb.0: # %entry
2805 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
2806 ; VLX-NEXT: kmovb %k0, %eax
2809 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2810 ; NoVLX: # %bb.0: # %entry
2811 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2812 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
2813 ; NoVLX-NEXT: kmovw %k0, %eax
2814 ; NoVLX-NEXT: andl $3, %eax
2815 ; NoVLX-NEXT: vzeroupper
2818 %0 = bitcast <2 x i64> %__a to <2 x i64>
2819 %load = load i64, ptr %__b
2820 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2821 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2822 %2 = icmp eq <2 x i64> %0, %1
2823 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2824 %4 = bitcast <4 x i1> %3 to i4
2828 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
2829 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2830 ; VLX: # %bb.0: # %entry
2831 ; VLX-NEXT: kmovd %edi, %k1
2832 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
2833 ; VLX-NEXT: kmovb %k0, %eax
2836 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2837 ; NoVLX: # %bb.0: # %entry
2838 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2839 ; NoVLX-NEXT: kmovw %edi, %k1
2840 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
2841 ; NoVLX-NEXT: kmovw %k0, %eax
2842 ; NoVLX-NEXT: andl $3, %eax
2843 ; NoVLX-NEXT: vzeroupper
2846 %0 = bitcast <2 x i64> %__a to <2 x i64>
2847 %load = load i64, ptr %__b
2848 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2849 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2850 %2 = icmp eq <2 x i64> %0, %1
2851 %3 = bitcast i8 %__u to <8 x i1>
2852 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2853 %4 = and <2 x i1> %extract.i, %2
2854 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2855 %6 = bitcast <4 x i1> %5 to i4
2860 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2861 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2862 ; VLX: # %bb.0: # %entry
2863 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
2864 ; VLX-NEXT: kmovd %k0, %eax
2865 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2868 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2869 ; NoVLX: # %bb.0: # %entry
2870 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2871 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2872 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2873 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2874 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2875 ; NoVLX-NEXT: kmovw %k0, %eax
2876 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2877 ; NoVLX-NEXT: vzeroupper
2880 %0 = bitcast <2 x i64> %__a to <2 x i64>
2881 %1 = bitcast <2 x i64> %__b to <2 x i64>
2882 %2 = icmp eq <2 x i64> %0, %1
2883 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2884 %4 = bitcast <8 x i1> %3 to i8
2888 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
2889 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2890 ; VLX: # %bb.0: # %entry
2891 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
2892 ; VLX-NEXT: kmovd %k0, %eax
2893 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2896 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2897 ; NoVLX: # %bb.0: # %entry
2898 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2899 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2900 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2901 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2902 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2903 ; NoVLX-NEXT: kmovw %k0, %eax
2904 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2905 ; NoVLX-NEXT: vzeroupper
2908 %0 = bitcast <2 x i64> %__a to <2 x i64>
2909 %load = load <2 x i64>, ptr %__b
2910 %1 = bitcast <2 x i64> %load to <2 x i64>
2911 %2 = icmp eq <2 x i64> %0, %1
2912 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2913 %4 = bitcast <8 x i1> %3 to i8
2917 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2918 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2919 ; VLX: # %bb.0: # %entry
2920 ; VLX-NEXT: kmovd %edi, %k1
2921 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2922 ; VLX-NEXT: kmovd %k0, %eax
2923 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2926 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2927 ; NoVLX: # %bb.0: # %entry
2928 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2929 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2930 ; NoVLX-NEXT: kmovw %edi, %k1
2931 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2932 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2933 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2934 ; NoVLX-NEXT: kmovw %k0, %eax
2935 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2936 ; NoVLX-NEXT: vzeroupper
2939 %0 = bitcast <2 x i64> %__a to <2 x i64>
2940 %1 = bitcast <2 x i64> %__b to <2 x i64>
2941 %2 = icmp eq <2 x i64> %0, %1
2942 %3 = bitcast i8 %__u to <8 x i1>
2943 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2944 %4 = and <2 x i1> %2, %extract.i
2945 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2946 %6 = bitcast <8 x i1> %5 to i8
2950 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
2951 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2952 ; VLX: # %bb.0: # %entry
2953 ; VLX-NEXT: kmovd %edi, %k1
2954 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2955 ; VLX-NEXT: kmovd %k0, %eax
2956 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2959 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2960 ; NoVLX: # %bb.0: # %entry
2961 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2962 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2963 ; NoVLX-NEXT: kmovw %edi, %k1
2964 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2965 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2966 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2967 ; NoVLX-NEXT: kmovw %k0, %eax
2968 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2969 ; NoVLX-NEXT: vzeroupper
2972 %0 = bitcast <2 x i64> %__a to <2 x i64>
2973 %load = load <2 x i64>, ptr %__b
2974 %1 = bitcast <2 x i64> %load to <2 x i64>
2975 %2 = icmp eq <2 x i64> %0, %1
2976 %3 = bitcast i8 %__u to <8 x i1>
2977 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2978 %4 = and <2 x i1> %2, %extract.i
2979 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2980 %6 = bitcast <8 x i1> %5 to i8
2985 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
2986 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
2987 ; VLX: # %bb.0: # %entry
2988 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
2989 ; VLX-NEXT: kmovd %k0, %eax
2990 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2993 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
2994 ; NoVLX: # %bb.0: # %entry
2995 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2996 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
2997 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2998 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2999 ; NoVLX-NEXT: kmovw %k0, %eax
3000 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3001 ; NoVLX-NEXT: vzeroupper
3004 %0 = bitcast <2 x i64> %__a to <2 x i64>
3005 %load = load i64, ptr %__b
3006 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3007 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3008 %2 = icmp eq <2 x i64> %0, %1
3009 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3010 %4 = bitcast <8 x i1> %3 to i8
3014 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3015 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3016 ; VLX: # %bb.0: # %entry
3017 ; VLX-NEXT: kmovd %edi, %k1
3018 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3019 ; VLX-NEXT: kmovd %k0, %eax
3020 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3023 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3024 ; NoVLX: # %bb.0: # %entry
3025 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3026 ; NoVLX-NEXT: kmovw %edi, %k1
3027 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3028 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3029 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3030 ; NoVLX-NEXT: kmovw %k0, %eax
3031 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3032 ; NoVLX-NEXT: vzeroupper
3035 %0 = bitcast <2 x i64> %__a to <2 x i64>
3036 %load = load i64, ptr %__b
3037 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3038 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3039 %2 = icmp eq <2 x i64> %0, %1
3040 %3 = bitcast i8 %__u to <8 x i1>
3041 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3042 %4 = and <2 x i1> %extract.i, %2
3043 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3044 %6 = bitcast <8 x i1> %5 to i8
3049 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3050 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3051 ; VLX: # %bb.0: # %entry
3052 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3053 ; VLX-NEXT: kmovd %k0, %eax
3054 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3057 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3058 ; NoVLX: # %bb.0: # %entry
3059 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3060 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3061 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3062 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3063 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3064 ; NoVLX-NEXT: kmovw %k0, %eax
3065 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3066 ; NoVLX-NEXT: vzeroupper
3069 %0 = bitcast <2 x i64> %__a to <2 x i64>
3070 %1 = bitcast <2 x i64> %__b to <2 x i64>
3071 %2 = icmp eq <2 x i64> %0, %1
3072 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3073 %4 = bitcast <16 x i1> %3 to i16
3077 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3078 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3079 ; VLX: # %bb.0: # %entry
3080 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3081 ; VLX-NEXT: kmovd %k0, %eax
3082 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3085 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3086 ; NoVLX: # %bb.0: # %entry
3087 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3088 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3089 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3090 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3091 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3092 ; NoVLX-NEXT: kmovw %k0, %eax
3093 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3094 ; NoVLX-NEXT: vzeroupper
3097 %0 = bitcast <2 x i64> %__a to <2 x i64>
3098 %load = load <2 x i64>, ptr %__b
3099 %1 = bitcast <2 x i64> %load to <2 x i64>
3100 %2 = icmp eq <2 x i64> %0, %1
3101 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3102 %4 = bitcast <16 x i1> %3 to i16
3106 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3107 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3108 ; VLX: # %bb.0: # %entry
3109 ; VLX-NEXT: kmovd %edi, %k1
3110 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3111 ; VLX-NEXT: kmovd %k0, %eax
3112 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3115 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3116 ; NoVLX: # %bb.0: # %entry
3117 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3118 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3119 ; NoVLX-NEXT: kmovw %edi, %k1
3120 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3121 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3122 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3123 ; NoVLX-NEXT: kmovw %k0, %eax
3124 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3125 ; NoVLX-NEXT: vzeroupper
3128 %0 = bitcast <2 x i64> %__a to <2 x i64>
3129 %1 = bitcast <2 x i64> %__b to <2 x i64>
3130 %2 = icmp eq <2 x i64> %0, %1
3131 %3 = bitcast i8 %__u to <8 x i1>
3132 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3133 %4 = and <2 x i1> %2, %extract.i
3134 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3135 %6 = bitcast <16 x i1> %5 to i16
3139 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3140 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3141 ; VLX: # %bb.0: # %entry
3142 ; VLX-NEXT: kmovd %edi, %k1
3143 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3144 ; VLX-NEXT: kmovd %k0, %eax
3145 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3148 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3149 ; NoVLX: # %bb.0: # %entry
3150 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3151 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3152 ; NoVLX-NEXT: kmovw %edi, %k1
3153 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3154 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3155 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3156 ; NoVLX-NEXT: kmovw %k0, %eax
3157 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3158 ; NoVLX-NEXT: vzeroupper
3161 %0 = bitcast <2 x i64> %__a to <2 x i64>
3162 %load = load <2 x i64>, ptr %__b
3163 %1 = bitcast <2 x i64> %load to <2 x i64>
3164 %2 = icmp eq <2 x i64> %0, %1
3165 %3 = bitcast i8 %__u to <8 x i1>
3166 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3167 %4 = and <2 x i1> %2, %extract.i
3168 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3169 %6 = bitcast <16 x i1> %5 to i16
3174 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3175 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3176 ; VLX: # %bb.0: # %entry
3177 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3178 ; VLX-NEXT: kmovd %k0, %eax
3179 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3182 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3183 ; NoVLX: # %bb.0: # %entry
3184 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3185 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3186 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3187 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3188 ; NoVLX-NEXT: kmovw %k0, %eax
3189 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3190 ; NoVLX-NEXT: vzeroupper
3193 %0 = bitcast <2 x i64> %__a to <2 x i64>
3194 %load = load i64, ptr %__b
3195 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3196 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3197 %2 = icmp eq <2 x i64> %0, %1
3198 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3199 %4 = bitcast <16 x i1> %3 to i16
3203 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3204 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3205 ; VLX: # %bb.0: # %entry
3206 ; VLX-NEXT: kmovd %edi, %k1
3207 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3208 ; VLX-NEXT: kmovd %k0, %eax
3209 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3212 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3213 ; NoVLX: # %bb.0: # %entry
3214 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3215 ; NoVLX-NEXT: kmovw %edi, %k1
3216 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3217 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3218 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3219 ; NoVLX-NEXT: kmovw %k0, %eax
3220 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3221 ; NoVLX-NEXT: vzeroupper
3224 %0 = bitcast <2 x i64> %__a to <2 x i64>
3225 %load = load i64, ptr %__b
3226 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3227 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3228 %2 = icmp eq <2 x i64> %0, %1
3229 %3 = bitcast i8 %__u to <8 x i1>
3230 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3231 %4 = and <2 x i1> %extract.i, %2
3232 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3233 %6 = bitcast <16 x i1> %5 to i16
3238 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3239 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3240 ; VLX: # %bb.0: # %entry
3241 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3242 ; VLX-NEXT: kmovd %k0, %eax
3245 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3246 ; NoVLX: # %bb.0: # %entry
3247 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3248 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3249 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3250 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3251 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3252 ; NoVLX-NEXT: kmovw %k0, %eax
3253 ; NoVLX-NEXT: vzeroupper
3256 %0 = bitcast <2 x i64> %__a to <2 x i64>
3257 %1 = bitcast <2 x i64> %__b to <2 x i64>
3258 %2 = icmp eq <2 x i64> %0, %1
3259 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3260 %4 = bitcast <32 x i1> %3 to i32
3264 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3265 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3266 ; VLX: # %bb.0: # %entry
3267 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3268 ; VLX-NEXT: kmovd %k0, %eax
3271 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3272 ; NoVLX: # %bb.0: # %entry
3273 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3274 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3275 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3276 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3277 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3278 ; NoVLX-NEXT: kmovw %k0, %eax
3279 ; NoVLX-NEXT: vzeroupper
3282 %0 = bitcast <2 x i64> %__a to <2 x i64>
3283 %load = load <2 x i64>, ptr %__b
3284 %1 = bitcast <2 x i64> %load to <2 x i64>
3285 %2 = icmp eq <2 x i64> %0, %1
3286 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3287 %4 = bitcast <32 x i1> %3 to i32
3291 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3292 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3293 ; VLX: # %bb.0: # %entry
3294 ; VLX-NEXT: kmovd %edi, %k1
3295 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3296 ; VLX-NEXT: kmovd %k0, %eax
3299 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3300 ; NoVLX: # %bb.0: # %entry
3301 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3302 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3303 ; NoVLX-NEXT: kmovw %edi, %k1
3304 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3305 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3306 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3307 ; NoVLX-NEXT: kmovw %k0, %eax
3308 ; NoVLX-NEXT: vzeroupper
3311 %0 = bitcast <2 x i64> %__a to <2 x i64>
3312 %1 = bitcast <2 x i64> %__b to <2 x i64>
3313 %2 = icmp eq <2 x i64> %0, %1
3314 %3 = bitcast i8 %__u to <8 x i1>
3315 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3316 %4 = and <2 x i1> %2, %extract.i
3317 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3318 %6 = bitcast <32 x i1> %5 to i32
3322 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3323 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3324 ; VLX: # %bb.0: # %entry
3325 ; VLX-NEXT: kmovd %edi, %k1
3326 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3327 ; VLX-NEXT: kmovd %k0, %eax
3330 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3331 ; NoVLX: # %bb.0: # %entry
3332 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3333 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3334 ; NoVLX-NEXT: kmovw %edi, %k1
3335 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3336 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3337 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3338 ; NoVLX-NEXT: kmovw %k0, %eax
3339 ; NoVLX-NEXT: vzeroupper
3342 %0 = bitcast <2 x i64> %__a to <2 x i64>
3343 %load = load <2 x i64>, ptr %__b
3344 %1 = bitcast <2 x i64> %load to <2 x i64>
3345 %2 = icmp eq <2 x i64> %0, %1
3346 %3 = bitcast i8 %__u to <8 x i1>
3347 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3348 %4 = and <2 x i1> %2, %extract.i
3349 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3350 %6 = bitcast <32 x i1> %5 to i32
3355 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3356 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3357 ; VLX: # %bb.0: # %entry
3358 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3359 ; VLX-NEXT: kmovd %k0, %eax
3362 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3363 ; NoVLX: # %bb.0: # %entry
3364 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3365 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3366 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3367 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3368 ; NoVLX-NEXT: kmovw %k0, %eax
3369 ; NoVLX-NEXT: vzeroupper
3372 %0 = bitcast <2 x i64> %__a to <2 x i64>
3373 %load = load i64, ptr %__b
3374 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3375 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3376 %2 = icmp eq <2 x i64> %0, %1
3377 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3378 %4 = bitcast <32 x i1> %3 to i32
3382 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3383 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3384 ; VLX: # %bb.0: # %entry
3385 ; VLX-NEXT: kmovd %edi, %k1
3386 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3387 ; VLX-NEXT: kmovd %k0, %eax
3390 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3391 ; NoVLX: # %bb.0: # %entry
3392 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3393 ; NoVLX-NEXT: kmovw %edi, %k1
3394 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3395 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3396 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3397 ; NoVLX-NEXT: kmovw %k0, %eax
3398 ; NoVLX-NEXT: vzeroupper
3401 %0 = bitcast <2 x i64> %__a to <2 x i64>
3402 %load = load i64, ptr %__b
3403 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3404 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3405 %2 = icmp eq <2 x i64> %0, %1
3406 %3 = bitcast i8 %__u to <8 x i1>
3407 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3408 %4 = and <2 x i1> %extract.i, %2
3409 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3410 %6 = bitcast <32 x i1> %5 to i32
3415 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3416 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3417 ; VLX: # %bb.0: # %entry
3418 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3419 ; VLX-NEXT: kmovq %k0, %rax
3422 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3423 ; NoVLX: # %bb.0: # %entry
3424 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3425 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3426 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3427 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3428 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3429 ; NoVLX-NEXT: kmovw %k0, %eax
3430 ; NoVLX-NEXT: vzeroupper
3433 %0 = bitcast <2 x i64> %__a to <2 x i64>
3434 %1 = bitcast <2 x i64> %__b to <2 x i64>
3435 %2 = icmp eq <2 x i64> %0, %1
3436 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3437 %4 = bitcast <64 x i1> %3 to i64
3441 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3442 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3443 ; VLX: # %bb.0: # %entry
3444 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3445 ; VLX-NEXT: kmovq %k0, %rax
3448 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3449 ; NoVLX: # %bb.0: # %entry
3450 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3451 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3452 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3453 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3454 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3455 ; NoVLX-NEXT: kmovw %k0, %eax
3456 ; NoVLX-NEXT: vzeroupper
3459 %0 = bitcast <2 x i64> %__a to <2 x i64>
3460 %load = load <2 x i64>, ptr %__b
3461 %1 = bitcast <2 x i64> %load to <2 x i64>
3462 %2 = icmp eq <2 x i64> %0, %1
3463 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3464 %4 = bitcast <64 x i1> %3 to i64
3468 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3469 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3470 ; VLX: # %bb.0: # %entry
3471 ; VLX-NEXT: kmovd %edi, %k1
3472 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3473 ; VLX-NEXT: kmovq %k0, %rax
3476 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3477 ; NoVLX: # %bb.0: # %entry
3478 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3479 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3480 ; NoVLX-NEXT: kmovw %edi, %k1
3481 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3482 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3483 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3484 ; NoVLX-NEXT: kmovw %k0, %eax
3485 ; NoVLX-NEXT: vzeroupper
3488 %0 = bitcast <2 x i64> %__a to <2 x i64>
3489 %1 = bitcast <2 x i64> %__b to <2 x i64>
3490 %2 = icmp eq <2 x i64> %0, %1
3491 %3 = bitcast i8 %__u to <8 x i1>
3492 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3493 %4 = and <2 x i1> %2, %extract.i
3494 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3495 %6 = bitcast <64 x i1> %5 to i64
3499 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3500 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3501 ; VLX: # %bb.0: # %entry
3502 ; VLX-NEXT: kmovd %edi, %k1
3503 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3504 ; VLX-NEXT: kmovq %k0, %rax
3507 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3508 ; NoVLX: # %bb.0: # %entry
3509 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3510 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3511 ; NoVLX-NEXT: kmovw %edi, %k1
3512 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3513 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3514 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3515 ; NoVLX-NEXT: kmovw %k0, %eax
3516 ; NoVLX-NEXT: vzeroupper
3519 %0 = bitcast <2 x i64> %__a to <2 x i64>
3520 %load = load <2 x i64>, ptr %__b
3521 %1 = bitcast <2 x i64> %load to <2 x i64>
3522 %2 = icmp eq <2 x i64> %0, %1
3523 %3 = bitcast i8 %__u to <8 x i1>
3524 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3525 %4 = and <2 x i1> %2, %extract.i
3526 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3527 %6 = bitcast <64 x i1> %5 to i64
3532 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
3533 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3534 ; VLX: # %bb.0: # %entry
3535 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3536 ; VLX-NEXT: kmovq %k0, %rax
3539 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3540 ; NoVLX: # %bb.0: # %entry
3541 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3542 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3543 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3544 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3545 ; NoVLX-NEXT: kmovw %k0, %eax
3546 ; NoVLX-NEXT: vzeroupper
3549 %0 = bitcast <2 x i64> %__a to <2 x i64>
3550 %load = load i64, ptr %__b
3551 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3552 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3553 %2 = icmp eq <2 x i64> %0, %1
3554 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3555 %4 = bitcast <64 x i1> %3 to i64
3559 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
3560 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3561 ; VLX: # %bb.0: # %entry
3562 ; VLX-NEXT: kmovd %edi, %k1
3563 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3564 ; VLX-NEXT: kmovq %k0, %rax
3567 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3568 ; NoVLX: # %bb.0: # %entry
3569 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3570 ; NoVLX-NEXT: kmovw %edi, %k1
3571 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3572 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3573 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3574 ; NoVLX-NEXT: kmovw %k0, %eax
3575 ; NoVLX-NEXT: vzeroupper
3578 %0 = bitcast <2 x i64> %__a to <2 x i64>
3579 %load = load i64, ptr %__b
3580 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3581 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3582 %2 = icmp eq <2 x i64> %0, %1
3583 %3 = bitcast i8 %__u to <8 x i1>
3584 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3585 %4 = and <2 x i1> %extract.i, %2
3586 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3587 %6 = bitcast <64 x i1> %5 to i64
3592 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3593 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3594 ; VLX: # %bb.0: # %entry
3595 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3596 ; VLX-NEXT: kmovd %k0, %eax
3597 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3598 ; VLX-NEXT: vzeroupper
3601 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3602 ; NoVLX: # %bb.0: # %entry
3603 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3604 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3605 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3606 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3607 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3608 ; NoVLX-NEXT: kmovw %k0, %eax
3609 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3610 ; NoVLX-NEXT: vzeroupper
3613 %0 = bitcast <4 x i64> %__a to <4 x i64>
3614 %1 = bitcast <4 x i64> %__b to <4 x i64>
3615 %2 = icmp eq <4 x i64> %0, %1
3616 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3617 %4 = bitcast <8 x i1> %3 to i8
3621 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
3622 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3623 ; VLX: # %bb.0: # %entry
3624 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
3625 ; VLX-NEXT: kmovd %k0, %eax
3626 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3627 ; VLX-NEXT: vzeroupper
3630 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3631 ; NoVLX: # %bb.0: # %entry
3632 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3633 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
3634 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3635 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3636 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3637 ; NoVLX-NEXT: kmovw %k0, %eax
3638 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3639 ; NoVLX-NEXT: vzeroupper
3642 %0 = bitcast <4 x i64> %__a to <4 x i64>
3643 %load = load <4 x i64>, ptr %__b
3644 %1 = bitcast <4 x i64> %load to <4 x i64>
3645 %2 = icmp eq <4 x i64> %0, %1
3646 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3647 %4 = bitcast <8 x i1> %3 to i8
3651 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3652 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3653 ; VLX: # %bb.0: # %entry
3654 ; VLX-NEXT: kmovd %edi, %k1
3655 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3656 ; VLX-NEXT: kmovd %k0, %eax
3657 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3658 ; VLX-NEXT: vzeroupper
3661 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3662 ; NoVLX: # %bb.0: # %entry
3663 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3664 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3665 ; NoVLX-NEXT: kmovw %edi, %k1
3666 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3667 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3668 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3669 ; NoVLX-NEXT: kmovw %k0, %eax
3670 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3671 ; NoVLX-NEXT: vzeroupper
3674 %0 = bitcast <4 x i64> %__a to <4 x i64>
3675 %1 = bitcast <4 x i64> %__b to <4 x i64>
3676 %2 = icmp eq <4 x i64> %0, %1
3677 %3 = bitcast i8 %__u to <8 x i1>
3678 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3679 %4 = and <4 x i1> %2, %extract.i
3680 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3681 %6 = bitcast <8 x i1> %5 to i8
3685 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
3686 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3687 ; VLX: # %bb.0: # %entry
3688 ; VLX-NEXT: kmovd %edi, %k1
3689 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3690 ; VLX-NEXT: kmovd %k0, %eax
3691 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3692 ; VLX-NEXT: vzeroupper
3695 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3696 ; NoVLX: # %bb.0: # %entry
3697 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3698 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
3699 ; NoVLX-NEXT: kmovw %edi, %k1
3700 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3701 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3702 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3703 ; NoVLX-NEXT: kmovw %k0, %eax
3704 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3705 ; NoVLX-NEXT: vzeroupper
3708 %0 = bitcast <4 x i64> %__a to <4 x i64>
3709 %load = load <4 x i64>, ptr %__b
3710 %1 = bitcast <4 x i64> %load to <4 x i64>
3711 %2 = icmp eq <4 x i64> %0, %1
3712 %3 = bitcast i8 %__u to <8 x i1>
3713 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3714 %4 = and <4 x i1> %2, %extract.i
3715 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3716 %6 = bitcast <8 x i1> %5 to i8
3721 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
3722 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3723 ; VLX: # %bb.0: # %entry
3724 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3725 ; VLX-NEXT: kmovd %k0, %eax
3726 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3727 ; VLX-NEXT: vzeroupper
3730 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3731 ; NoVLX: # %bb.0: # %entry
3732 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3733 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3734 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3735 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3736 ; NoVLX-NEXT: kmovw %k0, %eax
3737 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3738 ; NoVLX-NEXT: vzeroupper
3741 %0 = bitcast <4 x i64> %__a to <4 x i64>
3742 %load = load i64, ptr %__b
3743 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3744 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3745 %2 = icmp eq <4 x i64> %0, %1
3746 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3747 %4 = bitcast <8 x i1> %3 to i8
3751 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
3752 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3753 ; VLX: # %bb.0: # %entry
3754 ; VLX-NEXT: kmovd %edi, %k1
3755 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3756 ; VLX-NEXT: kmovd %k0, %eax
3757 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3758 ; VLX-NEXT: vzeroupper
3761 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3762 ; NoVLX: # %bb.0: # %entry
3763 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3764 ; NoVLX-NEXT: kmovw %edi, %k1
3765 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3766 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3767 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3768 ; NoVLX-NEXT: kmovw %k0, %eax
3769 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3770 ; NoVLX-NEXT: vzeroupper
3773 %0 = bitcast <4 x i64> %__a to <4 x i64>
3774 %load = load i64, ptr %__b
3775 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3776 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3777 %2 = icmp eq <4 x i64> %0, %1
3778 %3 = bitcast i8 %__u to <8 x i1>
3779 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3780 %4 = and <4 x i1> %extract.i, %2
3781 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3782 %6 = bitcast <8 x i1> %5 to i8
3787 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3788 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3789 ; VLX: # %bb.0: # %entry
3790 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3791 ; VLX-NEXT: kmovd %k0, %eax
3792 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3793 ; VLX-NEXT: vzeroupper
3796 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3797 ; NoVLX: # %bb.0: # %entry
3798 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3799 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3800 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3801 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3802 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3803 ; NoVLX-NEXT: kmovw %k0, %eax
3804 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3805 ; NoVLX-NEXT: vzeroupper
3808 %0 = bitcast <4 x i64> %__a to <4 x i64>
3809 %1 = bitcast <4 x i64> %__b to <4 x i64>
3810 %2 = icmp eq <4 x i64> %0, %1
3811 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3812 %4 = bitcast <16 x i1> %3 to i16
3816 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
3817 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3818 ; VLX: # %bb.0: # %entry
3819 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
3820 ; VLX-NEXT: kmovd %k0, %eax
3821 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3822 ; VLX-NEXT: vzeroupper
3825 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3826 ; NoVLX: # %bb.0: # %entry
3827 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3828 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
3829 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3830 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3831 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3832 ; NoVLX-NEXT: kmovw %k0, %eax
3833 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3834 ; NoVLX-NEXT: vzeroupper
3837 %0 = bitcast <4 x i64> %__a to <4 x i64>
3838 %load = load <4 x i64>, ptr %__b
3839 %1 = bitcast <4 x i64> %load to <4 x i64>
3840 %2 = icmp eq <4 x i64> %0, %1
3841 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3842 %4 = bitcast <16 x i1> %3 to i16
3846 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3847 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3848 ; VLX: # %bb.0: # %entry
3849 ; VLX-NEXT: kmovd %edi, %k1
3850 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3851 ; VLX-NEXT: kmovd %k0, %eax
3852 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3853 ; VLX-NEXT: vzeroupper
3856 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3857 ; NoVLX: # %bb.0: # %entry
3858 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3859 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3860 ; NoVLX-NEXT: kmovw %edi, %k1
3861 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3862 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3863 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3864 ; NoVLX-NEXT: kmovw %k0, %eax
3865 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3866 ; NoVLX-NEXT: vzeroupper
3869 %0 = bitcast <4 x i64> %__a to <4 x i64>
3870 %1 = bitcast <4 x i64> %__b to <4 x i64>
3871 %2 = icmp eq <4 x i64> %0, %1
3872 %3 = bitcast i8 %__u to <8 x i1>
3873 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3874 %4 = and <4 x i1> %2, %extract.i
3875 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3876 %6 = bitcast <16 x i1> %5 to i16
3880 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
3881 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3882 ; VLX: # %bb.0: # %entry
3883 ; VLX-NEXT: kmovd %edi, %k1
3884 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3885 ; VLX-NEXT: kmovd %k0, %eax
3886 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3887 ; VLX-NEXT: vzeroupper
3890 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3891 ; NoVLX: # %bb.0: # %entry
3892 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3893 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
3894 ; NoVLX-NEXT: kmovw %edi, %k1
3895 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3896 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3897 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3898 ; NoVLX-NEXT: kmovw %k0, %eax
3899 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3900 ; NoVLX-NEXT: vzeroupper
3903 %0 = bitcast <4 x i64> %__a to <4 x i64>
3904 %load = load <4 x i64>, ptr %__b
3905 %1 = bitcast <4 x i64> %load to <4 x i64>
3906 %2 = icmp eq <4 x i64> %0, %1
3907 %3 = bitcast i8 %__u to <8 x i1>
3908 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3909 %4 = and <4 x i1> %2, %extract.i
3910 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3911 %6 = bitcast <16 x i1> %5 to i16
3916 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
3917 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3918 ; VLX: # %bb.0: # %entry
3919 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3920 ; VLX-NEXT: kmovd %k0, %eax
3921 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3922 ; VLX-NEXT: vzeroupper
3925 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3926 ; NoVLX: # %bb.0: # %entry
3927 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3928 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
3929 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3930 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3931 ; NoVLX-NEXT: kmovw %k0, %eax
3932 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3933 ; NoVLX-NEXT: vzeroupper
3936 %0 = bitcast <4 x i64> %__a to <4 x i64>
3937 %load = load i64, ptr %__b
3938 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3939 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3940 %2 = icmp eq <4 x i64> %0, %1
3941 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3942 %4 = bitcast <16 x i1> %3 to i16
3946 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
3947 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3948 ; VLX: # %bb.0: # %entry
3949 ; VLX-NEXT: kmovd %edi, %k1
3950 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3951 ; VLX-NEXT: kmovd %k0, %eax
3952 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3953 ; VLX-NEXT: vzeroupper
3956 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3957 ; NoVLX: # %bb.0: # %entry
3958 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3959 ; NoVLX-NEXT: kmovw %edi, %k1
3960 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
3961 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3962 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3963 ; NoVLX-NEXT: kmovw %k0, %eax
3964 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3965 ; NoVLX-NEXT: vzeroupper
3968 %0 = bitcast <4 x i64> %__a to <4 x i64>
3969 %load = load i64, ptr %__b
3970 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3971 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3972 %2 = icmp eq <4 x i64> %0, %1
3973 %3 = bitcast i8 %__u to <8 x i1>
3974 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3975 %4 = and <4 x i1> %extract.i, %2
3976 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3977 %6 = bitcast <16 x i1> %5 to i16
3982 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3983 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
3984 ; VLX: # %bb.0: # %entry
3985 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3986 ; VLX-NEXT: kmovd %k0, %eax
3987 ; VLX-NEXT: vzeroupper
3990 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
3991 ; NoVLX: # %bb.0: # %entry
3992 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3993 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3994 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3995 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3996 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3997 ; NoVLX-NEXT: kmovw %k0, %eax
3998 ; NoVLX-NEXT: vzeroupper
4001 %0 = bitcast <4 x i64> %__a to <4 x i64>
4002 %1 = bitcast <4 x i64> %__b to <4 x i64>
4003 %2 = icmp eq <4 x i64> %0, %1
4004 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4005 %4 = bitcast <32 x i1> %3 to i32
4009 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
4010 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
4011 ; VLX: # %bb.0: # %entry
4012 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
4013 ; VLX-NEXT: kmovd %k0, %eax
4014 ; VLX-NEXT: vzeroupper
4017 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
4018 ; NoVLX: # %bb.0: # %entry
4019 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4020 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
4021 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4022 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4023 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4024 ; NoVLX-NEXT: kmovw %k0, %eax
4025 ; NoVLX-NEXT: vzeroupper
4028 %0 = bitcast <4 x i64> %__a to <4 x i64>
4029 %load = load <4 x i64>, ptr %__b
4030 %1 = bitcast <4 x i64> %load to <4 x i64>
4031 %2 = icmp eq <4 x i64> %0, %1
4032 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4033 %4 = bitcast <32 x i1> %3 to i32
4037 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4038 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4039 ; VLX: # %bb.0: # %entry
4040 ; VLX-NEXT: kmovd %edi, %k1
4041 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4042 ; VLX-NEXT: kmovd %k0, %eax
4043 ; VLX-NEXT: vzeroupper
4046 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4047 ; NoVLX: # %bb.0: # %entry
4048 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4049 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4050 ; NoVLX-NEXT: kmovw %edi, %k1
4051 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4052 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4053 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4054 ; NoVLX-NEXT: kmovw %k0, %eax
4055 ; NoVLX-NEXT: vzeroupper
4058 %0 = bitcast <4 x i64> %__a to <4 x i64>
4059 %1 = bitcast <4 x i64> %__b to <4 x i64>
4060 %2 = icmp eq <4 x i64> %0, %1
4061 %3 = bitcast i8 %__u to <8 x i1>
4062 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4063 %4 = and <4 x i1> %2, %extract.i
4064 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4065 %6 = bitcast <32 x i1> %5 to i32
4069 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
4070 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4071 ; VLX: # %bb.0: # %entry
4072 ; VLX-NEXT: kmovd %edi, %k1
4073 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4074 ; VLX-NEXT: kmovd %k0, %eax
4075 ; VLX-NEXT: vzeroupper
4078 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4079 ; NoVLX: # %bb.0: # %entry
4080 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4081 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
4082 ; NoVLX-NEXT: kmovw %edi, %k1
4083 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4084 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4085 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4086 ; NoVLX-NEXT: kmovw %k0, %eax
4087 ; NoVLX-NEXT: vzeroupper
4090 %0 = bitcast <4 x i64> %__a to <4 x i64>
4091 %load = load <4 x i64>, ptr %__b
4092 %1 = bitcast <4 x i64> %load to <4 x i64>
4093 %2 = icmp eq <4 x i64> %0, %1
4094 %3 = bitcast i8 %__u to <8 x i1>
4095 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4096 %4 = and <4 x i1> %2, %extract.i
4097 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4098 %6 = bitcast <32 x i1> %5 to i32
4103 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
4104 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4105 ; VLX: # %bb.0: # %entry
4106 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4107 ; VLX-NEXT: kmovd %k0, %eax
4108 ; VLX-NEXT: vzeroupper
4111 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4112 ; NoVLX: # %bb.0: # %entry
4113 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4114 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4115 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4116 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4117 ; NoVLX-NEXT: kmovw %k0, %eax
4118 ; NoVLX-NEXT: vzeroupper
4121 %0 = bitcast <4 x i64> %__a to <4 x i64>
4122 %load = load i64, ptr %__b
4123 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4124 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4125 %2 = icmp eq <4 x i64> %0, %1
4126 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4127 %4 = bitcast <32 x i1> %3 to i32
4131 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
4132 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4133 ; VLX: # %bb.0: # %entry
4134 ; VLX-NEXT: kmovd %edi, %k1
4135 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4136 ; VLX-NEXT: kmovd %k0, %eax
4137 ; VLX-NEXT: vzeroupper
4140 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4141 ; NoVLX: # %bb.0: # %entry
4142 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4143 ; NoVLX-NEXT: kmovw %edi, %k1
4144 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4145 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4146 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4147 ; NoVLX-NEXT: kmovw %k0, %eax
4148 ; NoVLX-NEXT: vzeroupper
4151 %0 = bitcast <4 x i64> %__a to <4 x i64>
4152 %load = load i64, ptr %__b
4153 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4154 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4155 %2 = icmp eq <4 x i64> %0, %1
4156 %3 = bitcast i8 %__u to <8 x i1>
4157 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4158 %4 = and <4 x i1> %extract.i, %2
4159 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4160 %6 = bitcast <32 x i1> %5 to i32
4165 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4166 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4167 ; VLX: # %bb.0: # %entry
4168 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
4169 ; VLX-NEXT: kmovq %k0, %rax
4170 ; VLX-NEXT: vzeroupper
4173 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4174 ; NoVLX: # %bb.0: # %entry
4175 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4176 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4177 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4178 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4179 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4180 ; NoVLX-NEXT: kmovw %k0, %eax
4181 ; NoVLX-NEXT: vzeroupper
4184 %0 = bitcast <4 x i64> %__a to <4 x i64>
4185 %1 = bitcast <4 x i64> %__b to <4 x i64>
4186 %2 = icmp eq <4 x i64> %0, %1
4187 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4188 %4 = bitcast <64 x i1> %3 to i64
4192 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
4193 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4194 ; VLX: # %bb.0: # %entry
4195 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
4196 ; VLX-NEXT: kmovq %k0, %rax
4197 ; VLX-NEXT: vzeroupper
4200 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4201 ; NoVLX: # %bb.0: # %entry
4202 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4203 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
4204 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4205 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4206 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4207 ; NoVLX-NEXT: kmovw %k0, %eax
4208 ; NoVLX-NEXT: vzeroupper
4211 %0 = bitcast <4 x i64> %__a to <4 x i64>
4212 %load = load <4 x i64>, ptr %__b
4213 %1 = bitcast <4 x i64> %load to <4 x i64>
4214 %2 = icmp eq <4 x i64> %0, %1
4215 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4216 %4 = bitcast <64 x i1> %3 to i64
4220 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4221 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4222 ; VLX: # %bb.0: # %entry
4223 ; VLX-NEXT: kmovd %edi, %k1
4224 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4225 ; VLX-NEXT: kmovq %k0, %rax
4226 ; VLX-NEXT: vzeroupper
4229 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4230 ; NoVLX: # %bb.0: # %entry
4231 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4232 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4233 ; NoVLX-NEXT: kmovw %edi, %k1
4234 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4235 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4236 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4237 ; NoVLX-NEXT: kmovw %k0, %eax
4238 ; NoVLX-NEXT: vzeroupper
4241 %0 = bitcast <4 x i64> %__a to <4 x i64>
4242 %1 = bitcast <4 x i64> %__b to <4 x i64>
4243 %2 = icmp eq <4 x i64> %0, %1
4244 %3 = bitcast i8 %__u to <8 x i1>
4245 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4246 %4 = and <4 x i1> %2, %extract.i
4247 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4248 %6 = bitcast <64 x i1> %5 to i64
4252 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
4253 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4254 ; VLX: # %bb.0: # %entry
4255 ; VLX-NEXT: kmovd %edi, %k1
4256 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4257 ; VLX-NEXT: kmovq %k0, %rax
4258 ; VLX-NEXT: vzeroupper
4261 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4262 ; NoVLX: # %bb.0: # %entry
4263 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4264 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
4265 ; NoVLX-NEXT: kmovw %edi, %k1
4266 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4267 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4268 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4269 ; NoVLX-NEXT: kmovw %k0, %eax
4270 ; NoVLX-NEXT: vzeroupper
4273 %0 = bitcast <4 x i64> %__a to <4 x i64>
4274 %load = load <4 x i64>, ptr %__b
4275 %1 = bitcast <4 x i64> %load to <4 x i64>
4276 %2 = icmp eq <4 x i64> %0, %1
4277 %3 = bitcast i8 %__u to <8 x i1>
4278 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4279 %4 = and <4 x i1> %2, %extract.i
4280 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4281 %6 = bitcast <64 x i1> %5 to i64
4286 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
4287 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4288 ; VLX: # %bb.0: # %entry
4289 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4290 ; VLX-NEXT: kmovq %k0, %rax
4291 ; VLX-NEXT: vzeroupper
4294 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4295 ; NoVLX: # %bb.0: # %entry
4296 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4297 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4298 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4299 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4300 ; NoVLX-NEXT: kmovw %k0, %eax
4301 ; NoVLX-NEXT: vzeroupper
4304 %0 = bitcast <4 x i64> %__a to <4 x i64>
4305 %load = load i64, ptr %__b
4306 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4307 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4308 %2 = icmp eq <4 x i64> %0, %1
4309 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4310 %4 = bitcast <64 x i1> %3 to i64
4314 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
4315 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4316 ; VLX: # %bb.0: # %entry
4317 ; VLX-NEXT: kmovd %edi, %k1
4318 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4319 ; VLX-NEXT: kmovq %k0, %rax
4320 ; VLX-NEXT: vzeroupper
4323 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4324 ; NoVLX: # %bb.0: # %entry
4325 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4326 ; NoVLX-NEXT: kmovw %edi, %k1
4327 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4328 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4329 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4330 ; NoVLX-NEXT: kmovw %k0, %eax
4331 ; NoVLX-NEXT: vzeroupper
4334 %0 = bitcast <4 x i64> %__a to <4 x i64>
4335 %load = load i64, ptr %__b
4336 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4337 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4338 %2 = icmp eq <4 x i64> %0, %1
4339 %3 = bitcast i8 %__u to <8 x i1>
4340 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4341 %4 = and <4 x i1> %extract.i, %2
4342 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4343 %6 = bitcast <64 x i1> %5 to i64
4348 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4349 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4350 ; VLX: # %bb.0: # %entry
4351 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4352 ; VLX-NEXT: kmovd %k0, %eax
4353 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4354 ; VLX-NEXT: vzeroupper
4357 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4358 ; NoVLX: # %bb.0: # %entry
4359 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4360 ; NoVLX-NEXT: kmovw %k0, %eax
4361 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4362 ; NoVLX-NEXT: vzeroupper
4365 %0 = bitcast <8 x i64> %__a to <8 x i64>
4366 %1 = bitcast <8 x i64> %__b to <8 x i64>
4367 %2 = icmp eq <8 x i64> %0, %1
4368 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4369 %4 = bitcast <16 x i1> %3 to i16
4373 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4374 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4375 ; VLX: # %bb.0: # %entry
4376 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4377 ; VLX-NEXT: kmovd %k0, %eax
4378 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4379 ; VLX-NEXT: vzeroupper
4382 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4383 ; NoVLX: # %bb.0: # %entry
4384 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4385 ; NoVLX-NEXT: kmovw %k0, %eax
4386 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4387 ; NoVLX-NEXT: vzeroupper
4390 %0 = bitcast <8 x i64> %__a to <8 x i64>
4391 %load = load <8 x i64>, ptr %__b
4392 %1 = bitcast <8 x i64> %load to <8 x i64>
4393 %2 = icmp eq <8 x i64> %0, %1
4394 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4395 %4 = bitcast <16 x i1> %3 to i16
4399 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4400 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4401 ; VLX: # %bb.0: # %entry
4402 ; VLX-NEXT: kmovd %edi, %k1
4403 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4404 ; VLX-NEXT: kmovd %k0, %eax
4405 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4406 ; VLX-NEXT: vzeroupper
4409 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4410 ; NoVLX: # %bb.0: # %entry
4411 ; NoVLX-NEXT: kmovw %edi, %k1
4412 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4413 ; NoVLX-NEXT: kmovw %k0, %eax
4414 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4415 ; NoVLX-NEXT: vzeroupper
4418 %0 = bitcast <8 x i64> %__a to <8 x i64>
4419 %1 = bitcast <8 x i64> %__b to <8 x i64>
4420 %2 = icmp eq <8 x i64> %0, %1
4421 %3 = bitcast i8 %__u to <8 x i1>
4422 %4 = and <8 x i1> %2, %3
4423 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4424 %6 = bitcast <16 x i1> %5 to i16
4428 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4429 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4430 ; VLX: # %bb.0: # %entry
4431 ; VLX-NEXT: kmovd %edi, %k1
4432 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4433 ; VLX-NEXT: kmovd %k0, %eax
4434 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4435 ; VLX-NEXT: vzeroupper
4438 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4439 ; NoVLX: # %bb.0: # %entry
4440 ; NoVLX-NEXT: kmovw %edi, %k1
4441 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4442 ; NoVLX-NEXT: kmovw %k0, %eax
4443 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4444 ; NoVLX-NEXT: vzeroupper
4447 %0 = bitcast <8 x i64> %__a to <8 x i64>
4448 %load = load <8 x i64>, ptr %__b
4449 %1 = bitcast <8 x i64> %load to <8 x i64>
4450 %2 = icmp eq <8 x i64> %0, %1
4451 %3 = bitcast i8 %__u to <8 x i1>
4452 %4 = and <8 x i1> %2, %3
4453 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4454 %6 = bitcast <16 x i1> %5 to i16
4459 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4460 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4461 ; VLX: # %bb.0: # %entry
4462 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4463 ; VLX-NEXT: kmovd %k0, %eax
4464 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4465 ; VLX-NEXT: vzeroupper
4468 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4469 ; NoVLX: # %bb.0: # %entry
4470 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4471 ; NoVLX-NEXT: kmovw %k0, %eax
4472 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4473 ; NoVLX-NEXT: vzeroupper
4476 %0 = bitcast <8 x i64> %__a to <8 x i64>
4477 %load = load i64, ptr %__b
4478 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4479 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4480 %2 = icmp eq <8 x i64> %0, %1
4481 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4482 %4 = bitcast <16 x i1> %3 to i16
4486 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4487 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4488 ; VLX: # %bb.0: # %entry
4489 ; VLX-NEXT: kmovd %edi, %k1
4490 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4491 ; VLX-NEXT: kmovd %k0, %eax
4492 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4493 ; VLX-NEXT: vzeroupper
4496 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4497 ; NoVLX: # %bb.0: # %entry
4498 ; NoVLX-NEXT: kmovw %edi, %k1
4499 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4500 ; NoVLX-NEXT: kmovw %k0, %eax
4501 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4502 ; NoVLX-NEXT: vzeroupper
4505 %0 = bitcast <8 x i64> %__a to <8 x i64>
4506 %load = load i64, ptr %__b
4507 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4508 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4509 %2 = icmp eq <8 x i64> %0, %1
4510 %3 = bitcast i8 %__u to <8 x i1>
4511 %4 = and <8 x i1> %3, %2
4512 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4513 %6 = bitcast <16 x i1> %5 to i16
4518 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4519 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4520 ; VLX: # %bb.0: # %entry
4521 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4522 ; VLX-NEXT: kmovd %k0, %eax
4523 ; VLX-NEXT: vzeroupper
4526 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4527 ; NoVLX: # %bb.0: # %entry
4528 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4529 ; NoVLX-NEXT: kmovw %k0, %eax
4530 ; NoVLX-NEXT: vzeroupper
4533 %0 = bitcast <8 x i64> %__a to <8 x i64>
4534 %1 = bitcast <8 x i64> %__b to <8 x i64>
4535 %2 = icmp eq <8 x i64> %0, %1
4536 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4537 %4 = bitcast <32 x i1> %3 to i32
4541 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4542 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4543 ; VLX: # %bb.0: # %entry
4544 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4545 ; VLX-NEXT: kmovd %k0, %eax
4546 ; VLX-NEXT: vzeroupper
4549 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4550 ; NoVLX: # %bb.0: # %entry
4551 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4552 ; NoVLX-NEXT: kmovw %k0, %eax
4553 ; NoVLX-NEXT: vzeroupper
4556 %0 = bitcast <8 x i64> %__a to <8 x i64>
4557 %load = load <8 x i64>, ptr %__b
4558 %1 = bitcast <8 x i64> %load to <8 x i64>
4559 %2 = icmp eq <8 x i64> %0, %1
4560 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4561 %4 = bitcast <32 x i1> %3 to i32
4565 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4566 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4567 ; VLX: # %bb.0: # %entry
4568 ; VLX-NEXT: kmovd %edi, %k1
4569 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4570 ; VLX-NEXT: kmovd %k0, %eax
4571 ; VLX-NEXT: vzeroupper
4574 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4575 ; NoVLX: # %bb.0: # %entry
4576 ; NoVLX-NEXT: kmovw %edi, %k1
4577 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4578 ; NoVLX-NEXT: kmovw %k0, %eax
4579 ; NoVLX-NEXT: vzeroupper
4582 %0 = bitcast <8 x i64> %__a to <8 x i64>
4583 %1 = bitcast <8 x i64> %__b to <8 x i64>
4584 %2 = icmp eq <8 x i64> %0, %1
4585 %3 = bitcast i8 %__u to <8 x i1>
4586 %4 = and <8 x i1> %2, %3
4587 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4588 %6 = bitcast <32 x i1> %5 to i32
4592 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4593 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4594 ; VLX: # %bb.0: # %entry
4595 ; VLX-NEXT: kmovd %edi, %k1
4596 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4597 ; VLX-NEXT: kmovd %k0, %eax
4598 ; VLX-NEXT: vzeroupper
4601 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4602 ; NoVLX: # %bb.0: # %entry
4603 ; NoVLX-NEXT: kmovw %edi, %k1
4604 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4605 ; NoVLX-NEXT: kmovw %k0, %eax
4606 ; NoVLX-NEXT: vzeroupper
4609 %0 = bitcast <8 x i64> %__a to <8 x i64>
4610 %load = load <8 x i64>, ptr %__b
4611 %1 = bitcast <8 x i64> %load to <8 x i64>
4612 %2 = icmp eq <8 x i64> %0, %1
4613 %3 = bitcast i8 %__u to <8 x i1>
4614 %4 = and <8 x i1> %2, %3
4615 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4616 %6 = bitcast <32 x i1> %5 to i32
4621 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4622 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4623 ; VLX: # %bb.0: # %entry
4624 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4625 ; VLX-NEXT: kmovd %k0, %eax
4626 ; VLX-NEXT: vzeroupper
4629 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4630 ; NoVLX: # %bb.0: # %entry
4631 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4632 ; NoVLX-NEXT: kmovw %k0, %eax
4633 ; NoVLX-NEXT: vzeroupper
4636 %0 = bitcast <8 x i64> %__a to <8 x i64>
4637 %load = load i64, ptr %__b
4638 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4639 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4640 %2 = icmp eq <8 x i64> %0, %1
4641 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4642 %4 = bitcast <32 x i1> %3 to i32
4646 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4647 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4648 ; VLX: # %bb.0: # %entry
4649 ; VLX-NEXT: kmovd %edi, %k1
4650 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4651 ; VLX-NEXT: kmovd %k0, %eax
4652 ; VLX-NEXT: vzeroupper
4655 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4656 ; NoVLX: # %bb.0: # %entry
4657 ; NoVLX-NEXT: kmovw %edi, %k1
4658 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4659 ; NoVLX-NEXT: kmovw %k0, %eax
4660 ; NoVLX-NEXT: vzeroupper
4663 %0 = bitcast <8 x i64> %__a to <8 x i64>
4664 %load = load i64, ptr %__b
4665 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4666 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4667 %2 = icmp eq <8 x i64> %0, %1
4668 %3 = bitcast i8 %__u to <8 x i1>
4669 %4 = and <8 x i1> %3, %2
4670 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4671 %6 = bitcast <32 x i1> %5 to i32
4676 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4677 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4678 ; VLX: # %bb.0: # %entry
4679 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4680 ; VLX-NEXT: kmovq %k0, %rax
4681 ; VLX-NEXT: vzeroupper
4684 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4685 ; NoVLX: # %bb.0: # %entry
4686 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4687 ; NoVLX-NEXT: kmovw %k0, %eax
4688 ; NoVLX-NEXT: vzeroupper
4691 %0 = bitcast <8 x i64> %__a to <8 x i64>
4692 %1 = bitcast <8 x i64> %__b to <8 x i64>
4693 %2 = icmp eq <8 x i64> %0, %1
4694 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4695 %4 = bitcast <64 x i1> %3 to i64
4699 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4700 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4701 ; VLX: # %bb.0: # %entry
4702 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4703 ; VLX-NEXT: kmovq %k0, %rax
4704 ; VLX-NEXT: vzeroupper
4707 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4708 ; NoVLX: # %bb.0: # %entry
4709 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4710 ; NoVLX-NEXT: kmovw %k0, %eax
4711 ; NoVLX-NEXT: vzeroupper
4714 %0 = bitcast <8 x i64> %__a to <8 x i64>
4715 %load = load <8 x i64>, ptr %__b
4716 %1 = bitcast <8 x i64> %load to <8 x i64>
4717 %2 = icmp eq <8 x i64> %0, %1
4718 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4719 %4 = bitcast <64 x i1> %3 to i64
4723 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4724 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4725 ; VLX: # %bb.0: # %entry
4726 ; VLX-NEXT: kmovd %edi, %k1
4727 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4728 ; VLX-NEXT: kmovq %k0, %rax
4729 ; VLX-NEXT: vzeroupper
4732 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4733 ; NoVLX: # %bb.0: # %entry
4734 ; NoVLX-NEXT: kmovw %edi, %k1
4735 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4736 ; NoVLX-NEXT: kmovw %k0, %eax
4737 ; NoVLX-NEXT: vzeroupper
4740 %0 = bitcast <8 x i64> %__a to <8 x i64>
4741 %1 = bitcast <8 x i64> %__b to <8 x i64>
4742 %2 = icmp eq <8 x i64> %0, %1
4743 %3 = bitcast i8 %__u to <8 x i1>
4744 %4 = and <8 x i1> %2, %3
4745 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4746 %6 = bitcast <64 x i1> %5 to i64
4750 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4751 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4752 ; VLX: # %bb.0: # %entry
4753 ; VLX-NEXT: kmovd %edi, %k1
4754 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4755 ; VLX-NEXT: kmovq %k0, %rax
4756 ; VLX-NEXT: vzeroupper
4759 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4760 ; NoVLX: # %bb.0: # %entry
4761 ; NoVLX-NEXT: kmovw %edi, %k1
4762 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4763 ; NoVLX-NEXT: kmovw %k0, %eax
4764 ; NoVLX-NEXT: vzeroupper
4767 %0 = bitcast <8 x i64> %__a to <8 x i64>
4768 %load = load <8 x i64>, ptr %__b
4769 %1 = bitcast <8 x i64> %load to <8 x i64>
4770 %2 = icmp eq <8 x i64> %0, %1
4771 %3 = bitcast i8 %__u to <8 x i1>
4772 %4 = and <8 x i1> %2, %3
4773 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4774 %6 = bitcast <64 x i1> %5 to i64
4779 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
4780 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4781 ; VLX: # %bb.0: # %entry
4782 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4783 ; VLX-NEXT: kmovq %k0, %rax
4784 ; VLX-NEXT: vzeroupper
4787 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4788 ; NoVLX: # %bb.0: # %entry
4789 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4790 ; NoVLX-NEXT: kmovw %k0, %eax
4791 ; NoVLX-NEXT: vzeroupper
4794 %0 = bitcast <8 x i64> %__a to <8 x i64>
4795 %load = load i64, ptr %__b
4796 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4797 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4798 %2 = icmp eq <8 x i64> %0, %1
4799 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4800 %4 = bitcast <64 x i1> %3 to i64
4804 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
4805 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4806 ; VLX: # %bb.0: # %entry
4807 ; VLX-NEXT: kmovd %edi, %k1
4808 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4809 ; VLX-NEXT: kmovq %k0, %rax
4810 ; VLX-NEXT: vzeroupper
4813 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4814 ; NoVLX: # %bb.0: # %entry
4815 ; NoVLX-NEXT: kmovw %edi, %k1
4816 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4817 ; NoVLX-NEXT: kmovw %k0, %eax
4818 ; NoVLX-NEXT: vzeroupper
4821 %0 = bitcast <8 x i64> %__a to <8 x i64>
4822 %load = load i64, ptr %__b
4823 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4824 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4825 %2 = icmp eq <8 x i64> %0, %1
4826 %3 = bitcast i8 %__u to <8 x i1>
4827 %4 = and <8 x i1> %3, %2
4828 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4829 %6 = bitcast <64 x i1> %5 to i64
4834 define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4835 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4836 ; VLX: # %bb.0: # %entry
4837 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
4838 ; VLX-NEXT: kmovd %k0, %eax
4841 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4842 ; NoVLX: # %bb.0: # %entry
4843 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4844 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4845 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4846 ; NoVLX-NEXT: kmovw %k0, %eax
4847 ; NoVLX-NEXT: vzeroupper
4850 %0 = bitcast <2 x i64> %__a to <16 x i8>
4851 %1 = bitcast <2 x i64> %__b to <16 x i8>
4852 %2 = icmp sgt <16 x i8> %0, %1
4853 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4854 %4 = bitcast <32 x i1> %3 to i32
4858 define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
4859 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4860 ; VLX: # %bb.0: # %entry
4861 ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
4862 ; VLX-NEXT: kmovd %k0, %eax
4865 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4866 ; NoVLX: # %bb.0: # %entry
4867 ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
4868 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4869 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4870 ; NoVLX-NEXT: kmovw %k0, %eax
4871 ; NoVLX-NEXT: vzeroupper
4874 %0 = bitcast <2 x i64> %__a to <16 x i8>
4875 %load = load <2 x i64>, ptr %__b
4876 %1 = bitcast <2 x i64> %load to <16 x i8>
4877 %2 = icmp sgt <16 x i8> %0, %1
4878 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4879 %4 = bitcast <32 x i1> %3 to i32
4883 define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4884 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4885 ; VLX: # %bb.0: # %entry
4886 ; VLX-NEXT: kmovd %edi, %k1
4887 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
4888 ; VLX-NEXT: kmovd %k0, %eax
4891 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4892 ; NoVLX: # %bb.0: # %entry
4893 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4894 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4895 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4896 ; NoVLX-NEXT: kmovw %k0, %eax
4897 ; NoVLX-NEXT: andl %edi, %eax
4898 ; NoVLX-NEXT: vzeroupper
4901 %0 = bitcast <2 x i64> %__a to <16 x i8>
4902 %1 = bitcast <2 x i64> %__b to <16 x i8>
4903 %2 = icmp sgt <16 x i8> %0, %1
4904 %3 = bitcast i16 %__u to <16 x i1>
4905 %4 = and <16 x i1> %2, %3
4906 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4907 %6 = bitcast <32 x i1> %5 to i32
4911 define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
4912 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4913 ; VLX: # %bb.0: # %entry
4914 ; VLX-NEXT: kmovd %edi, %k1
4915 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
4916 ; VLX-NEXT: kmovd %k0, %eax
4919 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4920 ; NoVLX: # %bb.0: # %entry
4921 ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
4922 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4923 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4924 ; NoVLX-NEXT: kmovw %k0, %eax
4925 ; NoVLX-NEXT: andl %edi, %eax
4926 ; NoVLX-NEXT: vzeroupper
4929 %0 = bitcast <2 x i64> %__a to <16 x i8>
4930 %load = load <2 x i64>, ptr %__b
4931 %1 = bitcast <2 x i64> %load to <16 x i8>
4932 %2 = icmp sgt <16 x i8> %0, %1
4933 %3 = bitcast i16 %__u to <16 x i1>
4934 %4 = and <16 x i1> %2, %3
4935 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4936 %6 = bitcast <32 x i1> %5 to i32
4941 define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4942 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
4943 ; VLX: # %bb.0: # %entry
4944 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
4945 ; VLX-NEXT: kmovq %k0, %rax
4948 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
4949 ; NoVLX: # %bb.0: # %entry
4950 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4951 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4952 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4953 ; NoVLX-NEXT: kmovw %k0, %eax
4954 ; NoVLX-NEXT: vzeroupper
4957 %0 = bitcast <2 x i64> %__a to <16 x i8>
4958 %1 = bitcast <2 x i64> %__b to <16 x i8>
4959 %2 = icmp sgt <16 x i8> %0, %1
4960 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4961 %4 = bitcast <64 x i1> %3 to i64
4965 define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
4966 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
4967 ; VLX: # %bb.0: # %entry
4968 ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
4969 ; VLX-NEXT: kmovq %k0, %rax
4972 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
4973 ; NoVLX: # %bb.0: # %entry
4974 ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
4975 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4976 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4977 ; NoVLX-NEXT: kmovw %k0, %eax
4978 ; NoVLX-NEXT: vzeroupper
4981 %0 = bitcast <2 x i64> %__a to <16 x i8>
4982 %load = load <2 x i64>, ptr %__b
4983 %1 = bitcast <2 x i64> %load to <16 x i8>
4984 %2 = icmp sgt <16 x i8> %0, %1
4985 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4986 %4 = bitcast <64 x i1> %3 to i64
4990 define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4991 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
4992 ; VLX: # %bb.0: # %entry
4993 ; VLX-NEXT: kmovd %edi, %k1
4994 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
4995 ; VLX-NEXT: kmovq %k0, %rax
4998 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
4999 ; NoVLX: # %bb.0: # %entry
5000 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
5001 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5002 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5003 ; NoVLX-NEXT: kmovw %k0, %eax
5004 ; NoVLX-NEXT: andl %edi, %eax
5005 ; NoVLX-NEXT: vzeroupper
5008 %0 = bitcast <2 x i64> %__a to <16 x i8>
5009 %1 = bitcast <2 x i64> %__b to <16 x i8>
5010 %2 = icmp sgt <16 x i8> %0, %1
5011 %3 = bitcast i16 %__u to <16 x i1>
5012 %4 = and <16 x i1> %2, %3
5013 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5014 %6 = bitcast <64 x i1> %5 to i64
5018 define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
5019 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5020 ; VLX: # %bb.0: # %entry
5021 ; VLX-NEXT: kmovd %edi, %k1
5022 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
5023 ; VLX-NEXT: kmovq %k0, %rax
5026 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5027 ; NoVLX: # %bb.0: # %entry
5028 ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
5029 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5030 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5031 ; NoVLX-NEXT: kmovw %k0, %eax
5032 ; NoVLX-NEXT: andl %edi, %eax
5033 ; NoVLX-NEXT: vzeroupper
5036 %0 = bitcast <2 x i64> %__a to <16 x i8>
5037 %load = load <2 x i64>, ptr %__b
5038 %1 = bitcast <2 x i64> %load to <16 x i8>
5039 %2 = icmp sgt <16 x i8> %0, %1
5040 %3 = bitcast i16 %__u to <16 x i1>
5041 %4 = and <16 x i1> %2, %3
5042 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5043 %6 = bitcast <64 x i1> %5 to i64
5048 define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5049 ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5050 ; VLX: # %bb.0: # %entry
5051 ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
5052 ; VLX-NEXT: kmovq %k0, %rax
5053 ; VLX-NEXT: vzeroupper
5056 ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5057 ; NoVLX: # %bb.0: # %entry
5058 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
5059 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5060 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5061 ; NoVLX-NEXT: kmovw %k0, %ecx
5062 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5063 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5064 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5065 ; NoVLX-NEXT: kmovw %k0, %eax
5066 ; NoVLX-NEXT: shll $16, %eax
5067 ; NoVLX-NEXT: orl %ecx, %eax
5068 ; NoVLX-NEXT: vzeroupper
5071 %0 = bitcast <4 x i64> %__a to <32 x i8>
5072 %1 = bitcast <4 x i64> %__b to <32 x i8>
5073 %2 = icmp sgt <32 x i8> %0, %1
5074 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5075 %4 = bitcast <64 x i1> %3 to i64
5079 define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
5080 ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5081 ; VLX: # %bb.0: # %entry
5082 ; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
5083 ; VLX-NEXT: kmovq %k0, %rax
5084 ; VLX-NEXT: vzeroupper
5087 ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5088 ; NoVLX: # %bb.0: # %entry
5089 ; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0
5090 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5091 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5092 ; NoVLX-NEXT: kmovw %k0, %ecx
5093 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5094 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5095 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5096 ; NoVLX-NEXT: kmovw %k0, %eax
5097 ; NoVLX-NEXT: shll $16, %eax
5098 ; NoVLX-NEXT: orl %ecx, %eax
5099 ; NoVLX-NEXT: vzeroupper
5102 %0 = bitcast <4 x i64> %__a to <32 x i8>
5103 %load = load <4 x i64>, ptr %__b
5104 %1 = bitcast <4 x i64> %load to <32 x i8>
5105 %2 = icmp sgt <32 x i8> %0, %1
5106 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5107 %4 = bitcast <64 x i1> %3 to i64
5111 define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5112 ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5113 ; VLX: # %bb.0: # %entry
5114 ; VLX-NEXT: kmovd %edi, %k1
5115 ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
5116 ; VLX-NEXT: kmovq %k0, %rax
5117 ; VLX-NEXT: vzeroupper
5120 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5121 ; NoVLX: # %bb.0: # %entry
5122 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
5123 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5124 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5125 ; NoVLX-NEXT: kmovw %k0, %eax
5126 ; NoVLX-NEXT: andl %edi, %eax
5127 ; NoVLX-NEXT: shrl $16, %edi
5128 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5129 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5130 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5131 ; NoVLX-NEXT: kmovw %k0, %ecx
5132 ; NoVLX-NEXT: andl %edi, %ecx
5133 ; NoVLX-NEXT: shll $16, %ecx
5134 ; NoVLX-NEXT: movzwl %ax, %eax
5135 ; NoVLX-NEXT: orl %ecx, %eax
5136 ; NoVLX-NEXT: vzeroupper
5139 %0 = bitcast <4 x i64> %__a to <32 x i8>
5140 %1 = bitcast <4 x i64> %__b to <32 x i8>
5141 %2 = icmp sgt <32 x i8> %0, %1
5142 %3 = bitcast i32 %__u to <32 x i1>
5143 %4 = and <32 x i1> %2, %3
5144 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5145 %6 = bitcast <64 x i1> %5 to i64
5149 define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
5150 ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5151 ; VLX: # %bb.0: # %entry
5152 ; VLX-NEXT: kmovd %edi, %k1
5153 ; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
5154 ; VLX-NEXT: kmovq %k0, %rax
5155 ; VLX-NEXT: vzeroupper
5158 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5159 ; NoVLX: # %bb.0: # %entry
5160 ; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
5161 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5162 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5163 ; NoVLX-NEXT: kmovw %k0, %eax
5164 ; NoVLX-NEXT: andl %edi, %eax
5165 ; NoVLX-NEXT: shrl $16, %edi
5166 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5167 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5168 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5169 ; NoVLX-NEXT: kmovw %k0, %ecx
5170 ; NoVLX-NEXT: andl %edi, %ecx
5171 ; NoVLX-NEXT: shll $16, %ecx
5172 ; NoVLX-NEXT: movzwl %ax, %eax
5173 ; NoVLX-NEXT: orl %ecx, %eax
5174 ; NoVLX-NEXT: vzeroupper
5177 %0 = bitcast <4 x i64> %__a to <32 x i8>
5178 %load = load <4 x i64>, ptr %__b
5179 %1 = bitcast <4 x i64> %load to <32 x i8>
5180 %2 = icmp sgt <32 x i8> %0, %1
5181 %3 = bitcast i32 %__u to <32 x i1>
5182 %4 = and <32 x i1> %2, %3
5183 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5184 %6 = bitcast <64 x i1> %5 to i64
5189 define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5190 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5191 ; VLX: # %bb.0: # %entry
5192 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5193 ; VLX-NEXT: kmovd %k0, %eax
5194 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5197 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5198 ; NoVLX: # %bb.0: # %entry
5199 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5200 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5201 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5202 ; NoVLX-NEXT: kmovw %k0, %eax
5203 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5204 ; NoVLX-NEXT: vzeroupper
5207 %0 = bitcast <2 x i64> %__a to <8 x i16>
5208 %1 = bitcast <2 x i64> %__b to <8 x i16>
5209 %2 = icmp sgt <8 x i16> %0, %1
5210 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5211 %4 = bitcast <16 x i1> %3 to i16
5215 define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
5216 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5217 ; VLX: # %bb.0: # %entry
5218 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5219 ; VLX-NEXT: kmovd %k0, %eax
5220 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5223 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5224 ; NoVLX: # %bb.0: # %entry
5225 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5226 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5227 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5228 ; NoVLX-NEXT: kmovw %k0, %eax
5229 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5230 ; NoVLX-NEXT: vzeroupper
5233 %0 = bitcast <2 x i64> %__a to <8 x i16>
5234 %load = load <2 x i64>, ptr %__b
5235 %1 = bitcast <2 x i64> %load to <8 x i16>
5236 %2 = icmp sgt <8 x i16> %0, %1
5237 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5238 %4 = bitcast <16 x i1> %3 to i16
5242 define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5243 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5244 ; VLX: # %bb.0: # %entry
5245 ; VLX-NEXT: kmovd %edi, %k1
5246 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5247 ; VLX-NEXT: kmovd %k0, %eax
5248 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5251 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5252 ; NoVLX: # %bb.0: # %entry
5253 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5254 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5255 ; NoVLX-NEXT: kmovw %edi, %k1
5256 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5257 ; NoVLX-NEXT: kmovw %k0, %eax
5258 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5259 ; NoVLX-NEXT: vzeroupper
5262 %0 = bitcast <2 x i64> %__a to <8 x i16>
5263 %1 = bitcast <2 x i64> %__b to <8 x i16>
5264 %2 = icmp sgt <8 x i16> %0, %1
5265 %3 = bitcast i8 %__u to <8 x i1>
5266 %4 = and <8 x i1> %2, %3
5267 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5268 %6 = bitcast <16 x i1> %5 to i16
5272 define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
5273 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5274 ; VLX: # %bb.0: # %entry
5275 ; VLX-NEXT: kmovd %edi, %k1
5276 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5277 ; VLX-NEXT: kmovd %k0, %eax
5278 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5281 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5282 ; NoVLX: # %bb.0: # %entry
5283 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5284 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5285 ; NoVLX-NEXT: kmovw %edi, %k1
5286 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5287 ; NoVLX-NEXT: kmovw %k0, %eax
5288 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5289 ; NoVLX-NEXT: vzeroupper
5292 %0 = bitcast <2 x i64> %__a to <8 x i16>
5293 %load = load <2 x i64>, ptr %__b
5294 %1 = bitcast <2 x i64> %load to <8 x i16>
5295 %2 = icmp sgt <8 x i16> %0, %1
5296 %3 = bitcast i8 %__u to <8 x i1>
5297 %4 = and <8 x i1> %2, %3
5298 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5299 %6 = bitcast <16 x i1> %5 to i16
5304 define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5305 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5306 ; VLX: # %bb.0: # %entry
5307 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5308 ; VLX-NEXT: kmovd %k0, %eax
5311 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5312 ; NoVLX: # %bb.0: # %entry
5313 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5314 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5315 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5316 ; NoVLX-NEXT: kmovw %k0, %eax
5317 ; NoVLX-NEXT: vzeroupper
5320 %0 = bitcast <2 x i64> %__a to <8 x i16>
5321 %1 = bitcast <2 x i64> %__b to <8 x i16>
5322 %2 = icmp sgt <8 x i16> %0, %1
5323 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5324 %4 = bitcast <32 x i1> %3 to i32
5328 define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
5329 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5330 ; VLX: # %bb.0: # %entry
5331 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5332 ; VLX-NEXT: kmovd %k0, %eax
5335 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5336 ; NoVLX: # %bb.0: # %entry
5337 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5338 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5339 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5340 ; NoVLX-NEXT: kmovw %k0, %eax
5341 ; NoVLX-NEXT: vzeroupper
5344 %0 = bitcast <2 x i64> %__a to <8 x i16>
5345 %load = load <2 x i64>, ptr %__b
5346 %1 = bitcast <2 x i64> %load to <8 x i16>
5347 %2 = icmp sgt <8 x i16> %0, %1
5348 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5349 %4 = bitcast <32 x i1> %3 to i32
5353 define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5354 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5355 ; VLX: # %bb.0: # %entry
5356 ; VLX-NEXT: kmovd %edi, %k1
5357 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5358 ; VLX-NEXT: kmovd %k0, %eax
5361 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5362 ; NoVLX: # %bb.0: # %entry
5363 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5364 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5365 ; NoVLX-NEXT: kmovw %edi, %k1
5366 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5367 ; NoVLX-NEXT: kmovw %k0, %eax
5368 ; NoVLX-NEXT: vzeroupper
5371 %0 = bitcast <2 x i64> %__a to <8 x i16>
5372 %1 = bitcast <2 x i64> %__b to <8 x i16>
5373 %2 = icmp sgt <8 x i16> %0, %1
5374 %3 = bitcast i8 %__u to <8 x i1>
5375 %4 = and <8 x i1> %2, %3
5376 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5377 %6 = bitcast <32 x i1> %5 to i32
5381 define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
5382 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5383 ; VLX: # %bb.0: # %entry
5384 ; VLX-NEXT: kmovd %edi, %k1
5385 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5386 ; VLX-NEXT: kmovd %k0, %eax
5389 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5390 ; NoVLX: # %bb.0: # %entry
5391 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5392 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5393 ; NoVLX-NEXT: kmovw %edi, %k1
5394 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5395 ; NoVLX-NEXT: kmovw %k0, %eax
5396 ; NoVLX-NEXT: vzeroupper
5399 %0 = bitcast <2 x i64> %__a to <8 x i16>
5400 %load = load <2 x i64>, ptr %__b
5401 %1 = bitcast <2 x i64> %load to <8 x i16>
5402 %2 = icmp sgt <8 x i16> %0, %1
5403 %3 = bitcast i8 %__u to <8 x i1>
5404 %4 = and <8 x i1> %2, %3
5405 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5406 %6 = bitcast <32 x i1> %5 to i32
5411 define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5412 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5413 ; VLX: # %bb.0: # %entry
5414 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5415 ; VLX-NEXT: kmovq %k0, %rax
5418 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5419 ; NoVLX: # %bb.0: # %entry
5420 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5421 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5422 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5423 ; NoVLX-NEXT: kmovw %k0, %eax
5424 ; NoVLX-NEXT: vzeroupper
5427 %0 = bitcast <2 x i64> %__a to <8 x i16>
5428 %1 = bitcast <2 x i64> %__b to <8 x i16>
5429 %2 = icmp sgt <8 x i16> %0, %1
5430 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5431 %4 = bitcast <64 x i1> %3 to i64
5435 define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
5436 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5437 ; VLX: # %bb.0: # %entry
5438 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5439 ; VLX-NEXT: kmovq %k0, %rax
5442 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5443 ; NoVLX: # %bb.0: # %entry
5444 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5445 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5446 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5447 ; NoVLX-NEXT: kmovw %k0, %eax
5448 ; NoVLX-NEXT: vzeroupper
5451 %0 = bitcast <2 x i64> %__a to <8 x i16>
5452 %load = load <2 x i64>, ptr %__b
5453 %1 = bitcast <2 x i64> %load to <8 x i16>
5454 %2 = icmp sgt <8 x i16> %0, %1
5455 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5456 %4 = bitcast <64 x i1> %3 to i64
5460 define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5461 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5462 ; VLX: # %bb.0: # %entry
5463 ; VLX-NEXT: kmovd %edi, %k1
5464 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5465 ; VLX-NEXT: kmovq %k0, %rax
5468 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5469 ; NoVLX: # %bb.0: # %entry
5470 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5471 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5472 ; NoVLX-NEXT: kmovw %edi, %k1
5473 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5474 ; NoVLX-NEXT: kmovw %k0, %eax
5475 ; NoVLX-NEXT: vzeroupper
5478 %0 = bitcast <2 x i64> %__a to <8 x i16>
5479 %1 = bitcast <2 x i64> %__b to <8 x i16>
5480 %2 = icmp sgt <8 x i16> %0, %1
5481 %3 = bitcast i8 %__u to <8 x i1>
5482 %4 = and <8 x i1> %2, %3
5483 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5484 %6 = bitcast <64 x i1> %5 to i64
5488 define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
5489 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5490 ; VLX: # %bb.0: # %entry
5491 ; VLX-NEXT: kmovd %edi, %k1
5492 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5493 ; VLX-NEXT: kmovq %k0, %rax
5496 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5497 ; NoVLX: # %bb.0: # %entry
5498 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5499 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5500 ; NoVLX-NEXT: kmovw %edi, %k1
5501 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5502 ; NoVLX-NEXT: kmovw %k0, %eax
5503 ; NoVLX-NEXT: vzeroupper
5506 %0 = bitcast <2 x i64> %__a to <8 x i16>
5507 %load = load <2 x i64>, ptr %__b
5508 %1 = bitcast <2 x i64> %load to <8 x i16>
5509 %2 = icmp sgt <8 x i16> %0, %1
5510 %3 = bitcast i8 %__u to <8 x i1>
5511 %4 = and <8 x i1> %2, %3
5512 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5513 %6 = bitcast <64 x i1> %5 to i64
5518 define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5519 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5520 ; VLX: # %bb.0: # %entry
5521 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
5522 ; VLX-NEXT: kmovd %k0, %eax
5523 ; VLX-NEXT: vzeroupper
5526 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5527 ; NoVLX: # %bb.0: # %entry
5528 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5529 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5530 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5531 ; NoVLX-NEXT: kmovw %k0, %eax
5532 ; NoVLX-NEXT: vzeroupper
5535 %0 = bitcast <4 x i64> %__a to <16 x i16>
5536 %1 = bitcast <4 x i64> %__b to <16 x i16>
5537 %2 = icmp sgt <16 x i16> %0, %1
5538 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5539 %4 = bitcast <32 x i1> %3 to i32
5543 define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
5544 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5545 ; VLX: # %bb.0: # %entry
5546 ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
5547 ; VLX-NEXT: kmovd %k0, %eax
5548 ; VLX-NEXT: vzeroupper
5551 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5552 ; NoVLX: # %bb.0: # %entry
5553 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5554 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5555 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5556 ; NoVLX-NEXT: kmovw %k0, %eax
5557 ; NoVLX-NEXT: vzeroupper
5560 %0 = bitcast <4 x i64> %__a to <16 x i16>
5561 %load = load <4 x i64>, ptr %__b
5562 %1 = bitcast <4 x i64> %load to <16 x i16>
5563 %2 = icmp sgt <16 x i16> %0, %1
5564 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5565 %4 = bitcast <32 x i1> %3 to i32
5569 define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5570 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5571 ; VLX: # %bb.0: # %entry
5572 ; VLX-NEXT: kmovd %edi, %k1
5573 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5574 ; VLX-NEXT: kmovd %k0, %eax
5575 ; VLX-NEXT: vzeroupper
5578 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5579 ; NoVLX: # %bb.0: # %entry
5580 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5581 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5582 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5583 ; NoVLX-NEXT: kmovw %k0, %eax
5584 ; NoVLX-NEXT: andl %edi, %eax
5585 ; NoVLX-NEXT: vzeroupper
5588 %0 = bitcast <4 x i64> %__a to <16 x i16>
5589 %1 = bitcast <4 x i64> %__b to <16 x i16>
5590 %2 = icmp sgt <16 x i16> %0, %1
5591 %3 = bitcast i16 %__u to <16 x i1>
5592 %4 = and <16 x i1> %2, %3
5593 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5594 %6 = bitcast <32 x i1> %5 to i32
5598 define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
5599 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5600 ; VLX: # %bb.0: # %entry
5601 ; VLX-NEXT: kmovd %edi, %k1
5602 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5603 ; VLX-NEXT: kmovd %k0, %eax
5604 ; VLX-NEXT: vzeroupper
5607 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5608 ; NoVLX: # %bb.0: # %entry
5609 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
5610 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5611 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5612 ; NoVLX-NEXT: kmovw %k0, %eax
5613 ; NoVLX-NEXT: andl %edi, %eax
5614 ; NoVLX-NEXT: vzeroupper
5617 %0 = bitcast <4 x i64> %__a to <16 x i16>
5618 %load = load <4 x i64>, ptr %__b
5619 %1 = bitcast <4 x i64> %load to <16 x i16>
5620 %2 = icmp sgt <16 x i16> %0, %1
5621 %3 = bitcast i16 %__u to <16 x i1>
5622 %4 = and <16 x i1> %2, %3
5623 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5624 %6 = bitcast <32 x i1> %5 to i32
5629 define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5630 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5631 ; VLX: # %bb.0: # %entry
5632 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
5633 ; VLX-NEXT: kmovq %k0, %rax
5634 ; VLX-NEXT: vzeroupper
5637 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5638 ; NoVLX: # %bb.0: # %entry
5639 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5640 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5641 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5642 ; NoVLX-NEXT: kmovw %k0, %eax
5643 ; NoVLX-NEXT: vzeroupper
5646 %0 = bitcast <4 x i64> %__a to <16 x i16>
5647 %1 = bitcast <4 x i64> %__b to <16 x i16>
5648 %2 = icmp sgt <16 x i16> %0, %1
5649 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5650 %4 = bitcast <64 x i1> %3 to i64
5654 define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
5655 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5656 ; VLX: # %bb.0: # %entry
5657 ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
5658 ; VLX-NEXT: kmovq %k0, %rax
5659 ; VLX-NEXT: vzeroupper
5662 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5663 ; NoVLX: # %bb.0: # %entry
5664 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5665 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5666 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5667 ; NoVLX-NEXT: kmovw %k0, %eax
5668 ; NoVLX-NEXT: vzeroupper
5671 %0 = bitcast <4 x i64> %__a to <16 x i16>
5672 %load = load <4 x i64>, ptr %__b
5673 %1 = bitcast <4 x i64> %load to <16 x i16>
5674 %2 = icmp sgt <16 x i16> %0, %1
5675 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5676 %4 = bitcast <64 x i1> %3 to i64
5680 define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5681 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5682 ; VLX: # %bb.0: # %entry
5683 ; VLX-NEXT: kmovd %edi, %k1
5684 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5685 ; VLX-NEXT: kmovq %k0, %rax
5686 ; VLX-NEXT: vzeroupper
5689 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5690 ; NoVLX: # %bb.0: # %entry
5691 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5692 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5693 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5694 ; NoVLX-NEXT: kmovw %k0, %eax
5695 ; NoVLX-NEXT: andl %edi, %eax
5696 ; NoVLX-NEXT: vzeroupper
5699 %0 = bitcast <4 x i64> %__a to <16 x i16>
5700 %1 = bitcast <4 x i64> %__b to <16 x i16>
5701 %2 = icmp sgt <16 x i16> %0, %1
5702 %3 = bitcast i16 %__u to <16 x i1>
5703 %4 = and <16 x i1> %2, %3
5704 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5705 %6 = bitcast <64 x i1> %5 to i64
5709 define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
5710 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5711 ; VLX: # %bb.0: # %entry
5712 ; VLX-NEXT: kmovd %edi, %k1
5713 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5714 ; VLX-NEXT: kmovq %k0, %rax
5715 ; VLX-NEXT: vzeroupper
5718 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5719 ; NoVLX: # %bb.0: # %entry
5720 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
5721 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5722 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5723 ; NoVLX-NEXT: kmovw %k0, %eax
5724 ; NoVLX-NEXT: andl %edi, %eax
5725 ; NoVLX-NEXT: vzeroupper
5728 %0 = bitcast <4 x i64> %__a to <16 x i16>
5729 %load = load <4 x i64>, ptr %__b
5730 %1 = bitcast <4 x i64> %load to <16 x i16>
5731 %2 = icmp sgt <16 x i16> %0, %1
5732 %3 = bitcast i16 %__u to <16 x i1>
5733 %4 = and <16 x i1> %2, %3
5734 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5735 %6 = bitcast <64 x i1> %5 to i64
5740 define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5741 ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5742 ; VLX: # %bb.0: # %entry
5743 ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
5744 ; VLX-NEXT: kmovq %k0, %rax
5745 ; VLX-NEXT: vzeroupper
5748 ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5749 ; NoVLX: # %bb.0: # %entry
5750 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2
5751 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
5752 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
5753 ; NoVLX-NEXT: kmovw %k0, %ecx
5754 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
5755 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
5756 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5757 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5758 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5759 ; NoVLX-NEXT: kmovw %k0, %eax
5760 ; NoVLX-NEXT: shll $16, %eax
5761 ; NoVLX-NEXT: orl %ecx, %eax
5762 ; NoVLX-NEXT: vzeroupper
5765 %0 = bitcast <8 x i64> %__a to <32 x i16>
5766 %1 = bitcast <8 x i64> %__b to <32 x i16>
5767 %2 = icmp sgt <32 x i16> %0, %1
5768 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5769 %4 = bitcast <64 x i1> %3 to i64
5773 define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
5774 ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5775 ; VLX: # %bb.0: # %entry
5776 ; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0
5777 ; VLX-NEXT: kmovq %k0, %rax
5778 ; VLX-NEXT: vzeroupper
5781 ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5782 ; NoVLX: # %bb.0: # %entry
5783 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm1
5784 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
5785 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5786 ; NoVLX-NEXT: kmovw %k0, %ecx
5787 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
5788 ; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm0, %ymm0
5789 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5790 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5791 ; NoVLX-NEXT: kmovw %k0, %eax
5792 ; NoVLX-NEXT: shll $16, %eax
5793 ; NoVLX-NEXT: orl %ecx, %eax
5794 ; NoVLX-NEXT: vzeroupper
5797 %0 = bitcast <8 x i64> %__a to <32 x i16>
5798 %load = load <8 x i64>, ptr %__b
5799 %1 = bitcast <8 x i64> %load to <32 x i16>
5800 %2 = icmp sgt <32 x i16> %0, %1
5801 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5802 %4 = bitcast <64 x i1> %3 to i64
5806 define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5807 ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5808 ; VLX: # %bb.0: # %entry
5809 ; VLX-NEXT: kmovd %edi, %k1
5810 ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
5811 ; VLX-NEXT: kmovq %k0, %rax
5812 ; VLX-NEXT: vzeroupper
5815 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5816 ; NoVLX: # %bb.0: # %entry
5817 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2
5818 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
5819 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
5820 ; NoVLX-NEXT: kmovw %k0, %eax
5821 ; NoVLX-NEXT: andl %edi, %eax
5822 ; NoVLX-NEXT: shrl $16, %edi
5823 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
5824 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
5825 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5826 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5827 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5828 ; NoVLX-NEXT: kmovw %k0, %ecx
5829 ; NoVLX-NEXT: andl %edi, %ecx
5830 ; NoVLX-NEXT: shll $16, %ecx
5831 ; NoVLX-NEXT: movzwl %ax, %eax
5832 ; NoVLX-NEXT: orl %ecx, %eax
5833 ; NoVLX-NEXT: vzeroupper
5836 %0 = bitcast <8 x i64> %__a to <32 x i16>
5837 %1 = bitcast <8 x i64> %__b to <32 x i16>
5838 %2 = icmp sgt <32 x i16> %0, %1
5839 %3 = bitcast i32 %__u to <32 x i1>
5840 %4 = and <32 x i1> %2, %3
5841 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5842 %6 = bitcast <64 x i1> %5 to i64
5846 define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
5847 ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5848 ; VLX: # %bb.0: # %entry
5849 ; VLX-NEXT: kmovd %edi, %k1
5850 ; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
5851 ; VLX-NEXT: kmovq %k0, %rax
5852 ; VLX-NEXT: vzeroupper
5855 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5856 ; NoVLX: # %bb.0: # %entry
5857 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm1
5858 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
5859 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5860 ; NoVLX-NEXT: kmovw %k0, %eax
5861 ; NoVLX-NEXT: andl %edi, %eax
5862 ; NoVLX-NEXT: shrl $16, %edi
5863 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
5864 ; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0
5865 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5866 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5867 ; NoVLX-NEXT: kmovw %k0, %ecx
5868 ; NoVLX-NEXT: andl %edi, %ecx
5869 ; NoVLX-NEXT: shll $16, %ecx
5870 ; NoVLX-NEXT: movzwl %ax, %eax
5871 ; NoVLX-NEXT: orl %ecx, %eax
5872 ; NoVLX-NEXT: vzeroupper
5875 %0 = bitcast <8 x i64> %__a to <32 x i16>
5876 %load = load <8 x i64>, ptr %__b
5877 %1 = bitcast <8 x i64> %load to <32 x i16>
5878 %2 = icmp sgt <32 x i16> %0, %1
5879 %3 = bitcast i32 %__u to <32 x i1>
5880 %4 = and <32 x i1> %2, %3
5881 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5882 %6 = bitcast <64 x i1> %5 to i64
5887 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5888 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5889 ; VLX: # %bb.0: # %entry
5890 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
5891 ; VLX-NEXT: kmovd %k0, %eax
5892 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5895 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5896 ; NoVLX: # %bb.0: # %entry
5897 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
5898 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5899 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
5900 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5901 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5902 ; NoVLX-NEXT: kmovw %k0, %eax
5903 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5904 ; NoVLX-NEXT: vzeroupper
5907 %0 = bitcast <2 x i64> %__a to <4 x i32>
5908 %1 = bitcast <2 x i64> %__b to <4 x i32>
5909 %2 = icmp sgt <4 x i32> %0, %1
5910 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5911 %4 = bitcast <8 x i1> %3 to i8
5915 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
5916 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5917 ; VLX: # %bb.0: # %entry
5918 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
5919 ; VLX-NEXT: kmovd %k0, %eax
5920 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5923 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5924 ; NoVLX: # %bb.0: # %entry
5925 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5926 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
5927 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
5928 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5929 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5930 ; NoVLX-NEXT: kmovw %k0, %eax
5931 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5932 ; NoVLX-NEXT: vzeroupper
5935 %0 = bitcast <2 x i64> %__a to <4 x i32>
5936 %load = load <2 x i64>, ptr %__b
5937 %1 = bitcast <2 x i64> %load to <4 x i32>
5938 %2 = icmp sgt <4 x i32> %0, %1
5939 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5940 %4 = bitcast <8 x i1> %3 to i8
5944 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5945 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
5946 ; VLX: # %bb.0: # %entry
5947 ; VLX-NEXT: kmovd %edi, %k1
5948 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
5949 ; VLX-NEXT: kmovd %k0, %eax
5950 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5953 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
5954 ; NoVLX: # %bb.0: # %entry
5955 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
5956 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5957 ; NoVLX-NEXT: kmovw %edi, %k1
5958 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
5959 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5960 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5961 ; NoVLX-NEXT: kmovw %k0, %eax
5962 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5963 ; NoVLX-NEXT: vzeroupper
5966 %0 = bitcast <2 x i64> %__a to <4 x i32>
5967 %1 = bitcast <2 x i64> %__b to <4 x i32>
5968 %2 = icmp sgt <4 x i32> %0, %1
5969 %3 = bitcast i8 %__u to <8 x i1>
5970 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5971 %4 = and <4 x i1> %2, %extract.i
5972 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5973 %6 = bitcast <8 x i1> %5 to i8
5977 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
5978 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
5979 ; VLX: # %bb.0: # %entry
5980 ; VLX-NEXT: kmovd %edi, %k1
5981 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
5982 ; VLX-NEXT: kmovd %k0, %eax
5983 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5986 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
5987 ; NoVLX: # %bb.0: # %entry
5988 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5989 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
5990 ; NoVLX-NEXT: kmovw %edi, %k1
5991 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
5992 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5993 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5994 ; NoVLX-NEXT: kmovw %k0, %eax
5995 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5996 ; NoVLX-NEXT: vzeroupper
5999 %0 = bitcast <2 x i64> %__a to <4 x i32>
6000 %load = load <2 x i64>, ptr %__b
6001 %1 = bitcast <2 x i64> %load to <4 x i32>
6002 %2 = icmp sgt <4 x i32> %0, %1
6003 %3 = bitcast i8 %__u to <8 x i1>
6004 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6005 %4 = and <4 x i1> %2, %extract.i
6006 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6007 %6 = bitcast <8 x i1> %5 to i8
6012 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6013 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6014 ; VLX: # %bb.0: # %entry
6015 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6016 ; VLX-NEXT: kmovd %k0, %eax
6017 ; VLX-NEXT: # kill: def $al killed $al killed $eax
6020 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6021 ; NoVLX: # %bb.0: # %entry
6022 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6023 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6024 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6025 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6026 ; NoVLX-NEXT: kmovw %k0, %eax
6027 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6028 ; NoVLX-NEXT: vzeroupper
6031 %0 = bitcast <2 x i64> %__a to <4 x i32>
6032 %load = load i32, ptr %__b
6033 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6034 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6035 %2 = icmp sgt <4 x i32> %0, %1
6036 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6037 %4 = bitcast <8 x i1> %3 to i8
6041 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6042 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6043 ; VLX: # %bb.0: # %entry
6044 ; VLX-NEXT: kmovd %edi, %k1
6045 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6046 ; VLX-NEXT: kmovd %k0, %eax
6047 ; VLX-NEXT: # kill: def $al killed $al killed $eax
6050 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6051 ; NoVLX: # %bb.0: # %entry
6052 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6053 ; NoVLX-NEXT: kmovw %edi, %k1
6054 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6055 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6056 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6057 ; NoVLX-NEXT: kmovw %k0, %eax
6058 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6059 ; NoVLX-NEXT: vzeroupper
6062 %0 = bitcast <2 x i64> %__a to <4 x i32>
6063 %load = load i32, ptr %__b
6064 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6065 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6066 %2 = icmp sgt <4 x i32> %0, %1
6067 %3 = bitcast i8 %__u to <8 x i1>
6068 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6069 %4 = and <4 x i1> %extract.i, %2
6070 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6071 %6 = bitcast <8 x i1> %5 to i8
6076 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6077 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6078 ; VLX: # %bb.0: # %entry
6079 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6080 ; VLX-NEXT: kmovd %k0, %eax
6081 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6084 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6085 ; NoVLX: # %bb.0: # %entry
6086 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6087 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6088 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6089 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6090 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6091 ; NoVLX-NEXT: kmovw %k0, %eax
6092 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6093 ; NoVLX-NEXT: vzeroupper
6096 %0 = bitcast <2 x i64> %__a to <4 x i32>
6097 %1 = bitcast <2 x i64> %__b to <4 x i32>
6098 %2 = icmp sgt <4 x i32> %0, %1
6099 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6100 %4 = bitcast <16 x i1> %3 to i16
6104 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6105 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6106 ; VLX: # %bb.0: # %entry
6107 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6108 ; VLX-NEXT: kmovd %k0, %eax
6109 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6112 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6113 ; NoVLX: # %bb.0: # %entry
6114 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6115 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6116 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6117 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6118 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6119 ; NoVLX-NEXT: kmovw %k0, %eax
6120 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6121 ; NoVLX-NEXT: vzeroupper
6124 %0 = bitcast <2 x i64> %__a to <4 x i32>
6125 %load = load <2 x i64>, ptr %__b
6126 %1 = bitcast <2 x i64> %load to <4 x i32>
6127 %2 = icmp sgt <4 x i32> %0, %1
6128 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6129 %4 = bitcast <16 x i1> %3 to i16
6133 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6134 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6135 ; VLX: # %bb.0: # %entry
6136 ; VLX-NEXT: kmovd %edi, %k1
6137 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6138 ; VLX-NEXT: kmovd %k0, %eax
6139 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6142 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6143 ; NoVLX: # %bb.0: # %entry
6144 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6145 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6146 ; NoVLX-NEXT: kmovw %edi, %k1
6147 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6148 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6149 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6150 ; NoVLX-NEXT: kmovw %k0, %eax
6151 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6152 ; NoVLX-NEXT: vzeroupper
6155 %0 = bitcast <2 x i64> %__a to <4 x i32>
6156 %1 = bitcast <2 x i64> %__b to <4 x i32>
6157 %2 = icmp sgt <4 x i32> %0, %1
6158 %3 = bitcast i8 %__u to <8 x i1>
6159 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6160 %4 = and <4 x i1> %2, %extract.i
6161 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6162 %6 = bitcast <16 x i1> %5 to i16
6166 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6167 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6168 ; VLX: # %bb.0: # %entry
6169 ; VLX-NEXT: kmovd %edi, %k1
6170 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6171 ; VLX-NEXT: kmovd %k0, %eax
6172 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6175 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6176 ; NoVLX: # %bb.0: # %entry
6177 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6178 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6179 ; NoVLX-NEXT: kmovw %edi, %k1
6180 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6181 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6182 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6183 ; NoVLX-NEXT: kmovw %k0, %eax
6184 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6185 ; NoVLX-NEXT: vzeroupper
6188 %0 = bitcast <2 x i64> %__a to <4 x i32>
6189 %load = load <2 x i64>, ptr %__b
6190 %1 = bitcast <2 x i64> %load to <4 x i32>
6191 %2 = icmp sgt <4 x i32> %0, %1
6192 %3 = bitcast i8 %__u to <8 x i1>
6193 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6194 %4 = and <4 x i1> %2, %extract.i
6195 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6196 %6 = bitcast <16 x i1> %5 to i16
6201 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6202 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6203 ; VLX: # %bb.0: # %entry
6204 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6205 ; VLX-NEXT: kmovd %k0, %eax
6206 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6209 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6210 ; NoVLX: # %bb.0: # %entry
6211 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6212 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6213 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6214 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6215 ; NoVLX-NEXT: kmovw %k0, %eax
6216 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6217 ; NoVLX-NEXT: vzeroupper
6220 %0 = bitcast <2 x i64> %__a to <4 x i32>
6221 %load = load i32, ptr %__b
6222 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6223 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6224 %2 = icmp sgt <4 x i32> %0, %1
6225 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6226 %4 = bitcast <16 x i1> %3 to i16
6230 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6231 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6232 ; VLX: # %bb.0: # %entry
6233 ; VLX-NEXT: kmovd %edi, %k1
6234 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6235 ; VLX-NEXT: kmovd %k0, %eax
6236 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6239 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6240 ; NoVLX: # %bb.0: # %entry
6241 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6242 ; NoVLX-NEXT: kmovw %edi, %k1
6243 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6244 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6245 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6246 ; NoVLX-NEXT: kmovw %k0, %eax
6247 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6248 ; NoVLX-NEXT: vzeroupper
6251 %0 = bitcast <2 x i64> %__a to <4 x i32>
6252 %load = load i32, ptr %__b
6253 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6254 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6255 %2 = icmp sgt <4 x i32> %0, %1
6256 %3 = bitcast i8 %__u to <8 x i1>
6257 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6258 %4 = and <4 x i1> %extract.i, %2
6259 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6260 %6 = bitcast <16 x i1> %5 to i16
6265 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6266 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6267 ; VLX: # %bb.0: # %entry
6268 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6269 ; VLX-NEXT: kmovd %k0, %eax
6272 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6273 ; NoVLX: # %bb.0: # %entry
6274 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6275 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6276 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6277 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6278 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6279 ; NoVLX-NEXT: kmovw %k0, %eax
6280 ; NoVLX-NEXT: vzeroupper
6283 %0 = bitcast <2 x i64> %__a to <4 x i32>
6284 %1 = bitcast <2 x i64> %__b to <4 x i32>
6285 %2 = icmp sgt <4 x i32> %0, %1
6286 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6287 %4 = bitcast <32 x i1> %3 to i32
6291 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6292 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6293 ; VLX: # %bb.0: # %entry
6294 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6295 ; VLX-NEXT: kmovd %k0, %eax
6298 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6299 ; NoVLX: # %bb.0: # %entry
6300 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6301 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6302 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6303 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6304 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6305 ; NoVLX-NEXT: kmovw %k0, %eax
6306 ; NoVLX-NEXT: vzeroupper
6309 %0 = bitcast <2 x i64> %__a to <4 x i32>
6310 %load = load <2 x i64>, ptr %__b
6311 %1 = bitcast <2 x i64> %load to <4 x i32>
6312 %2 = icmp sgt <4 x i32> %0, %1
6313 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6314 %4 = bitcast <32 x i1> %3 to i32
6318 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6319 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6320 ; VLX: # %bb.0: # %entry
6321 ; VLX-NEXT: kmovd %edi, %k1
6322 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6323 ; VLX-NEXT: kmovd %k0, %eax
6326 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6327 ; NoVLX: # %bb.0: # %entry
6328 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6329 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6330 ; NoVLX-NEXT: kmovw %edi, %k1
6331 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6332 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6333 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6334 ; NoVLX-NEXT: kmovw %k0, %eax
6335 ; NoVLX-NEXT: vzeroupper
6338 %0 = bitcast <2 x i64> %__a to <4 x i32>
6339 %1 = bitcast <2 x i64> %__b to <4 x i32>
6340 %2 = icmp sgt <4 x i32> %0, %1
6341 %3 = bitcast i8 %__u to <8 x i1>
6342 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6343 %4 = and <4 x i1> %2, %extract.i
6344 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6345 %6 = bitcast <32 x i1> %5 to i32
6349 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6350 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6351 ; VLX: # %bb.0: # %entry
6352 ; VLX-NEXT: kmovd %edi, %k1
6353 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6354 ; VLX-NEXT: kmovd %k0, %eax
6357 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6358 ; NoVLX: # %bb.0: # %entry
6359 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6360 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6361 ; NoVLX-NEXT: kmovw %edi, %k1
6362 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6363 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6364 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6365 ; NoVLX-NEXT: kmovw %k0, %eax
6366 ; NoVLX-NEXT: vzeroupper
6369 %0 = bitcast <2 x i64> %__a to <4 x i32>
6370 %load = load <2 x i64>, ptr %__b
6371 %1 = bitcast <2 x i64> %load to <4 x i32>
6372 %2 = icmp sgt <4 x i32> %0, %1
6373 %3 = bitcast i8 %__u to <8 x i1>
6374 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6375 %4 = and <4 x i1> %2, %extract.i
6376 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6377 %6 = bitcast <32 x i1> %5 to i32
6382 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6383 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6384 ; VLX: # %bb.0: # %entry
6385 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6386 ; VLX-NEXT: kmovd %k0, %eax
6389 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6390 ; NoVLX: # %bb.0: # %entry
6391 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6392 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6393 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6394 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6395 ; NoVLX-NEXT: kmovw %k0, %eax
6396 ; NoVLX-NEXT: vzeroupper
6399 %0 = bitcast <2 x i64> %__a to <4 x i32>
6400 %load = load i32, ptr %__b
6401 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6402 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6403 %2 = icmp sgt <4 x i32> %0, %1
6404 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6405 %4 = bitcast <32 x i1> %3 to i32
6409 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6410 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6411 ; VLX: # %bb.0: # %entry
6412 ; VLX-NEXT: kmovd %edi, %k1
6413 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6414 ; VLX-NEXT: kmovd %k0, %eax
6417 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6418 ; NoVLX: # %bb.0: # %entry
6419 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6420 ; NoVLX-NEXT: kmovw %edi, %k1
6421 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6422 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6423 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6424 ; NoVLX-NEXT: kmovw %k0, %eax
6425 ; NoVLX-NEXT: vzeroupper
6428 %0 = bitcast <2 x i64> %__a to <4 x i32>
6429 %load = load i32, ptr %__b
6430 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6431 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6432 %2 = icmp sgt <4 x i32> %0, %1
6433 %3 = bitcast i8 %__u to <8 x i1>
6434 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6435 %4 = and <4 x i1> %extract.i, %2
6436 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6437 %6 = bitcast <32 x i1> %5 to i32
6442 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6443 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6444 ; VLX: # %bb.0: # %entry
6445 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6446 ; VLX-NEXT: kmovq %k0, %rax
6449 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6450 ; NoVLX: # %bb.0: # %entry
6451 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6452 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6453 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6454 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6455 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6456 ; NoVLX-NEXT: kmovw %k0, %eax
6457 ; NoVLX-NEXT: vzeroupper
6460 %0 = bitcast <2 x i64> %__a to <4 x i32>
6461 %1 = bitcast <2 x i64> %__b to <4 x i32>
6462 %2 = icmp sgt <4 x i32> %0, %1
6463 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6464 %4 = bitcast <64 x i1> %3 to i64
6468 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6469 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6470 ; VLX: # %bb.0: # %entry
6471 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6472 ; VLX-NEXT: kmovq %k0, %rax
6475 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6476 ; NoVLX: # %bb.0: # %entry
6477 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6478 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6479 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6480 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6481 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6482 ; NoVLX-NEXT: kmovw %k0, %eax
6483 ; NoVLX-NEXT: vzeroupper
6486 %0 = bitcast <2 x i64> %__a to <4 x i32>
6487 %load = load <2 x i64>, ptr %__b
6488 %1 = bitcast <2 x i64> %load to <4 x i32>
6489 %2 = icmp sgt <4 x i32> %0, %1
6490 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6491 %4 = bitcast <64 x i1> %3 to i64
6495 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6496 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6497 ; VLX: # %bb.0: # %entry
6498 ; VLX-NEXT: kmovd %edi, %k1
6499 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6500 ; VLX-NEXT: kmovq %k0, %rax
6503 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6504 ; NoVLX: # %bb.0: # %entry
6505 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6506 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6507 ; NoVLX-NEXT: kmovw %edi, %k1
6508 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6509 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6510 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6511 ; NoVLX-NEXT: kmovw %k0, %eax
6512 ; NoVLX-NEXT: vzeroupper
6515 %0 = bitcast <2 x i64> %__a to <4 x i32>
6516 %1 = bitcast <2 x i64> %__b to <4 x i32>
6517 %2 = icmp sgt <4 x i32> %0, %1
6518 %3 = bitcast i8 %__u to <8 x i1>
6519 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6520 %4 = and <4 x i1> %2, %extract.i
6521 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6522 %6 = bitcast <64 x i1> %5 to i64
6526 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6527 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6528 ; VLX: # %bb.0: # %entry
6529 ; VLX-NEXT: kmovd %edi, %k1
6530 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6531 ; VLX-NEXT: kmovq %k0, %rax
6534 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6535 ; NoVLX: # %bb.0: # %entry
6536 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6537 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6538 ; NoVLX-NEXT: kmovw %edi, %k1
6539 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6540 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6541 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6542 ; NoVLX-NEXT: kmovw %k0, %eax
6543 ; NoVLX-NEXT: vzeroupper
6546 %0 = bitcast <2 x i64> %__a to <4 x i32>
6547 %load = load <2 x i64>, ptr %__b
6548 %1 = bitcast <2 x i64> %load to <4 x i32>
6549 %2 = icmp sgt <4 x i32> %0, %1
6550 %3 = bitcast i8 %__u to <8 x i1>
6551 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6552 %4 = and <4 x i1> %2, %extract.i
6553 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6554 %6 = bitcast <64 x i1> %5 to i64
6559 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
6560 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6561 ; VLX: # %bb.0: # %entry
6562 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6563 ; VLX-NEXT: kmovq %k0, %rax
6566 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6567 ; NoVLX: # %bb.0: # %entry
6568 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6569 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6570 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6571 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6572 ; NoVLX-NEXT: kmovw %k0, %eax
6573 ; NoVLX-NEXT: vzeroupper
6576 %0 = bitcast <2 x i64> %__a to <4 x i32>
6577 %load = load i32, ptr %__b
6578 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6579 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6580 %2 = icmp sgt <4 x i32> %0, %1
6581 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6582 %4 = bitcast <64 x i1> %3 to i64
6586 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
6587 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6588 ; VLX: # %bb.0: # %entry
6589 ; VLX-NEXT: kmovd %edi, %k1
6590 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6591 ; VLX-NEXT: kmovq %k0, %rax
6594 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6595 ; NoVLX: # %bb.0: # %entry
6596 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6597 ; NoVLX-NEXT: kmovw %edi, %k1
6598 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6599 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6600 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6601 ; NoVLX-NEXT: kmovw %k0, %eax
6602 ; NoVLX-NEXT: vzeroupper
6605 %0 = bitcast <2 x i64> %__a to <4 x i32>
6606 %load = load i32, ptr %__b
6607 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6608 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6609 %2 = icmp sgt <4 x i32> %0, %1
6610 %3 = bitcast i8 %__u to <8 x i1>
6611 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6612 %4 = and <4 x i1> %extract.i, %2
6613 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6614 %6 = bitcast <64 x i1> %5 to i64
6619 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6620 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6621 ; VLX: # %bb.0: # %entry
6622 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
6623 ; VLX-NEXT: kmovd %k0, %eax
6624 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6625 ; VLX-NEXT: vzeroupper
6628 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6629 ; NoVLX: # %bb.0: # %entry
6630 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6631 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6632 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6633 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6634 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6635 ; NoVLX-NEXT: kmovw %k0, %eax
6636 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6637 ; NoVLX-NEXT: vzeroupper
6640 %0 = bitcast <4 x i64> %__a to <8 x i32>
6641 %1 = bitcast <4 x i64> %__b to <8 x i32>
6642 %2 = icmp sgt <8 x i32> %0, %1
6643 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6644 %4 = bitcast <16 x i1> %3 to i16
6648 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
6649 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6650 ; VLX: # %bb.0: # %entry
6651 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
6652 ; VLX-NEXT: kmovd %k0, %eax
6653 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6654 ; VLX-NEXT: vzeroupper
6657 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6658 ; NoVLX: # %bb.0: # %entry
6659 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6660 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
6661 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6662 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6663 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6664 ; NoVLX-NEXT: kmovw %k0, %eax
6665 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6666 ; NoVLX-NEXT: vzeroupper
6669 %0 = bitcast <4 x i64> %__a to <8 x i32>
6670 %load = load <4 x i64>, ptr %__b
6671 %1 = bitcast <4 x i64> %load to <8 x i32>
6672 %2 = icmp sgt <8 x i32> %0, %1
6673 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6674 %4 = bitcast <16 x i1> %3 to i16
6678 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6679 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6680 ; VLX: # %bb.0: # %entry
6681 ; VLX-NEXT: kmovd %edi, %k1
6682 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6683 ; VLX-NEXT: kmovd %k0, %eax
6684 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6685 ; VLX-NEXT: vzeroupper
6688 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6689 ; NoVLX: # %bb.0: # %entry
6690 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6691 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6692 ; NoVLX-NEXT: kmovw %edi, %k1
6693 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6694 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6695 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6696 ; NoVLX-NEXT: kmovw %k0, %eax
6697 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6698 ; NoVLX-NEXT: vzeroupper
6701 %0 = bitcast <4 x i64> %__a to <8 x i32>
6702 %1 = bitcast <4 x i64> %__b to <8 x i32>
6703 %2 = icmp sgt <8 x i32> %0, %1
6704 %3 = bitcast i8 %__u to <8 x i1>
6705 %4 = and <8 x i1> %2, %3
6706 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6707 %6 = bitcast <16 x i1> %5 to i16
6711 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
6712 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6713 ; VLX: # %bb.0: # %entry
6714 ; VLX-NEXT: kmovd %edi, %k1
6715 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6716 ; VLX-NEXT: kmovd %k0, %eax
6717 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6718 ; VLX-NEXT: vzeroupper
6721 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6722 ; NoVLX: # %bb.0: # %entry
6723 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6724 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
6725 ; NoVLX-NEXT: kmovw %edi, %k1
6726 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6727 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6728 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6729 ; NoVLX-NEXT: kmovw %k0, %eax
6730 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6731 ; NoVLX-NEXT: vzeroupper
6734 %0 = bitcast <4 x i64> %__a to <8 x i32>
6735 %load = load <4 x i64>, ptr %__b
6736 %1 = bitcast <4 x i64> %load to <8 x i32>
6737 %2 = icmp sgt <8 x i32> %0, %1
6738 %3 = bitcast i8 %__u to <8 x i1>
6739 %4 = and <8 x i1> %2, %3
6740 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6741 %6 = bitcast <16 x i1> %5 to i16
6746 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
6747 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6748 ; VLX: # %bb.0: # %entry
6749 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
6750 ; VLX-NEXT: kmovd %k0, %eax
6751 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6752 ; VLX-NEXT: vzeroupper
6755 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6756 ; NoVLX: # %bb.0: # %entry
6757 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6758 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6759 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6760 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6761 ; NoVLX-NEXT: kmovw %k0, %eax
6762 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6763 ; NoVLX-NEXT: vzeroupper
6766 %0 = bitcast <4 x i64> %__a to <8 x i32>
6767 %load = load i32, ptr %__b
6768 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6769 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6770 %2 = icmp sgt <8 x i32> %0, %1
6771 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6772 %4 = bitcast <16 x i1> %3 to i16
6776 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
6777 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6778 ; VLX: # %bb.0: # %entry
6779 ; VLX-NEXT: kmovd %edi, %k1
6780 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
6781 ; VLX-NEXT: kmovd %k0, %eax
6782 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6783 ; VLX-NEXT: vzeroupper
6786 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6787 ; NoVLX: # %bb.0: # %entry
6788 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6789 ; NoVLX-NEXT: kmovw %edi, %k1
6790 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6791 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6792 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6793 ; NoVLX-NEXT: kmovw %k0, %eax
6794 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6795 ; NoVLX-NEXT: vzeroupper
6798 %0 = bitcast <4 x i64> %__a to <8 x i32>
6799 %load = load i32, ptr %__b
6800 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6801 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6802 %2 = icmp sgt <8 x i32> %0, %1
6803 %3 = bitcast i8 %__u to <8 x i1>
6804 %4 = and <8 x i1> %3, %2
6805 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6806 %6 = bitcast <16 x i1> %5 to i16
6811 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6812 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6813 ; VLX: # %bb.0: # %entry
6814 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
6815 ; VLX-NEXT: kmovd %k0, %eax
6816 ; VLX-NEXT: vzeroupper
6819 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6820 ; NoVLX: # %bb.0: # %entry
6821 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6822 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6823 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6824 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6825 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6826 ; NoVLX-NEXT: kmovw %k0, %eax
6827 ; NoVLX-NEXT: vzeroupper
6830 %0 = bitcast <4 x i64> %__a to <8 x i32>
6831 %1 = bitcast <4 x i64> %__b to <8 x i32>
6832 %2 = icmp sgt <8 x i32> %0, %1
6833 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6834 %4 = bitcast <32 x i1> %3 to i32
6838 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
6839 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6840 ; VLX: # %bb.0: # %entry
6841 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
6842 ; VLX-NEXT: kmovd %k0, %eax
6843 ; VLX-NEXT: vzeroupper
6846 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6847 ; NoVLX: # %bb.0: # %entry
6848 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6849 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
6850 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6851 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6852 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6853 ; NoVLX-NEXT: kmovw %k0, %eax
6854 ; NoVLX-NEXT: vzeroupper
6857 %0 = bitcast <4 x i64> %__a to <8 x i32>
6858 %load = load <4 x i64>, ptr %__b
6859 %1 = bitcast <4 x i64> %load to <8 x i32>
6860 %2 = icmp sgt <8 x i32> %0, %1
6861 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6862 %4 = bitcast <32 x i1> %3 to i32
6866 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6867 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6868 ; VLX: # %bb.0: # %entry
6869 ; VLX-NEXT: kmovd %edi, %k1
6870 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6871 ; VLX-NEXT: kmovd %k0, %eax
6872 ; VLX-NEXT: vzeroupper
6875 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6876 ; NoVLX: # %bb.0: # %entry
6877 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6878 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6879 ; NoVLX-NEXT: kmovw %edi, %k1
6880 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6881 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6882 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6883 ; NoVLX-NEXT: kmovw %k0, %eax
6884 ; NoVLX-NEXT: vzeroupper
6887 %0 = bitcast <4 x i64> %__a to <8 x i32>
6888 %1 = bitcast <4 x i64> %__b to <8 x i32>
6889 %2 = icmp sgt <8 x i32> %0, %1
6890 %3 = bitcast i8 %__u to <8 x i1>
6891 %4 = and <8 x i1> %2, %3
6892 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6893 %6 = bitcast <32 x i1> %5 to i32
6897 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
6898 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6899 ; VLX: # %bb.0: # %entry
6900 ; VLX-NEXT: kmovd %edi, %k1
6901 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6902 ; VLX-NEXT: kmovd %k0, %eax
6903 ; VLX-NEXT: vzeroupper
6906 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6907 ; NoVLX: # %bb.0: # %entry
6908 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6909 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
6910 ; NoVLX-NEXT: kmovw %edi, %k1
6911 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6912 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6913 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6914 ; NoVLX-NEXT: kmovw %k0, %eax
6915 ; NoVLX-NEXT: vzeroupper
6918 %0 = bitcast <4 x i64> %__a to <8 x i32>
6919 %load = load <4 x i64>, ptr %__b
6920 %1 = bitcast <4 x i64> %load to <8 x i32>
6921 %2 = icmp sgt <8 x i32> %0, %1
6922 %3 = bitcast i8 %__u to <8 x i1>
6923 %4 = and <8 x i1> %2, %3
6924 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6925 %6 = bitcast <32 x i1> %5 to i32
6930 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
6931 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6932 ; VLX: # %bb.0: # %entry
6933 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
6934 ; VLX-NEXT: kmovd %k0, %eax
6935 ; VLX-NEXT: vzeroupper
6938 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6939 ; NoVLX: # %bb.0: # %entry
6940 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6941 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
6942 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6943 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6944 ; NoVLX-NEXT: kmovw %k0, %eax
6945 ; NoVLX-NEXT: vzeroupper
6948 %0 = bitcast <4 x i64> %__a to <8 x i32>
6949 %load = load i32, ptr %__b
6950 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6951 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6952 %2 = icmp sgt <8 x i32> %0, %1
6953 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6954 %4 = bitcast <32 x i1> %3 to i32
6958 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
6959 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6960 ; VLX: # %bb.0: # %entry
6961 ; VLX-NEXT: kmovd %edi, %k1
6962 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
6963 ; VLX-NEXT: kmovd %k0, %eax
6964 ; VLX-NEXT: vzeroupper
6967 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
6968 ; NoVLX: # %bb.0: # %entry
6969 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6970 ; NoVLX-NEXT: kmovw %edi, %k1
6971 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
6972 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6973 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6974 ; NoVLX-NEXT: kmovw %k0, %eax
6975 ; NoVLX-NEXT: vzeroupper
6978 %0 = bitcast <4 x i64> %__a to <8 x i32>
6979 %load = load i32, ptr %__b
6980 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6981 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6982 %2 = icmp sgt <8 x i32> %0, %1
6983 %3 = bitcast i8 %__u to <8 x i1>
6984 %4 = and <8 x i1> %3, %2
6985 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6986 %6 = bitcast <32 x i1> %5 to i32
6991 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6992 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
6993 ; VLX: # %bb.0: # %entry
6994 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
6995 ; VLX-NEXT: kmovq %k0, %rax
6996 ; VLX-NEXT: vzeroupper
6999 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
7000 ; NoVLX: # %bb.0: # %entry
7001 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
7002 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7003 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7004 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7005 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7006 ; NoVLX-NEXT: kmovw %k0, %eax
7007 ; NoVLX-NEXT: vzeroupper
7010 %0 = bitcast <4 x i64> %__a to <8 x i32>
7011 %1 = bitcast <4 x i64> %__b to <8 x i32>
7012 %2 = icmp sgt <8 x i32> %0, %1
7013 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7014 %4 = bitcast <64 x i1> %3 to i64
7018 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
7019 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7020 ; VLX: # %bb.0: # %entry
7021 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
7022 ; VLX-NEXT: kmovq %k0, %rax
7023 ; VLX-NEXT: vzeroupper
7026 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7027 ; NoVLX: # %bb.0: # %entry
7028 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7029 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
7030 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7031 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7032 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7033 ; NoVLX-NEXT: kmovw %k0, %eax
7034 ; NoVLX-NEXT: vzeroupper
7037 %0 = bitcast <4 x i64> %__a to <8 x i32>
7038 %load = load <4 x i64>, ptr %__b
7039 %1 = bitcast <4 x i64> %load to <8 x i32>
7040 %2 = icmp sgt <8 x i32> %0, %1
7041 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7042 %4 = bitcast <64 x i1> %3 to i64
7046 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
7047 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7048 ; VLX: # %bb.0: # %entry
7049 ; VLX-NEXT: kmovd %edi, %k1
7050 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
7051 ; VLX-NEXT: kmovq %k0, %rax
7052 ; VLX-NEXT: vzeroupper
7055 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7056 ; NoVLX: # %bb.0: # %entry
7057 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
7058 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7059 ; NoVLX-NEXT: kmovw %edi, %k1
7060 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7061 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7062 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7063 ; NoVLX-NEXT: kmovw %k0, %eax
7064 ; NoVLX-NEXT: vzeroupper
7067 %0 = bitcast <4 x i64> %__a to <8 x i32>
7068 %1 = bitcast <4 x i64> %__b to <8 x i32>
7069 %2 = icmp sgt <8 x i32> %0, %1
7070 %3 = bitcast i8 %__u to <8 x i1>
7071 %4 = and <8 x i1> %2, %3
7072 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7073 %6 = bitcast <64 x i1> %5 to i64
7077 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
7078 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7079 ; VLX: # %bb.0: # %entry
7080 ; VLX-NEXT: kmovd %edi, %k1
7081 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
7082 ; VLX-NEXT: kmovq %k0, %rax
7083 ; VLX-NEXT: vzeroupper
7086 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7087 ; NoVLX: # %bb.0: # %entry
7088 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7089 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
7090 ; NoVLX-NEXT: kmovw %edi, %k1
7091 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7092 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7093 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7094 ; NoVLX-NEXT: kmovw %k0, %eax
7095 ; NoVLX-NEXT: vzeroupper
7098 %0 = bitcast <4 x i64> %__a to <8 x i32>
7099 %load = load <4 x i64>, ptr %__b
7100 %1 = bitcast <4 x i64> %load to <8 x i32>
7101 %2 = icmp sgt <8 x i32> %0, %1
7102 %3 = bitcast i8 %__u to <8 x i1>
7103 %4 = and <8 x i1> %2, %3
7104 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7105 %6 = bitcast <64 x i1> %5 to i64
7110 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
7111 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7112 ; VLX: # %bb.0: # %entry
7113 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
7114 ; VLX-NEXT: kmovq %k0, %rax
7115 ; VLX-NEXT: vzeroupper
7118 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7119 ; NoVLX: # %bb.0: # %entry
7120 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7121 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7122 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7123 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7124 ; NoVLX-NEXT: kmovw %k0, %eax
7125 ; NoVLX-NEXT: vzeroupper
7128 %0 = bitcast <4 x i64> %__a to <8 x i32>
7129 %load = load i32, ptr %__b
7130 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7131 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7132 %2 = icmp sgt <8 x i32> %0, %1
7133 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7134 %4 = bitcast <64 x i1> %3 to i64
7138 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
7139 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7140 ; VLX: # %bb.0: # %entry
7141 ; VLX-NEXT: kmovd %edi, %k1
7142 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
7143 ; VLX-NEXT: kmovq %k0, %rax
7144 ; VLX-NEXT: vzeroupper
7147 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7148 ; NoVLX: # %bb.0: # %entry
7149 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7150 ; NoVLX-NEXT: kmovw %edi, %k1
7151 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7152 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7153 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7154 ; NoVLX-NEXT: kmovw %k0, %eax
7155 ; NoVLX-NEXT: vzeroupper
7158 %0 = bitcast <4 x i64> %__a to <8 x i32>
7159 %load = load i32, ptr %__b
7160 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7161 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7162 %2 = icmp sgt <8 x i32> %0, %1
7163 %3 = bitcast i8 %__u to <8 x i1>
7164 %4 = and <8 x i1> %3, %2
7165 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7166 %6 = bitcast <64 x i1> %5 to i64
7171 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7172 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7173 ; VLX: # %bb.0: # %entry
7174 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7175 ; VLX-NEXT: kmovd %k0, %eax
7176 ; VLX-NEXT: vzeroupper
7179 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7180 ; NoVLX: # %bb.0: # %entry
7181 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7182 ; NoVLX-NEXT: kmovw %k0, %eax
7183 ; NoVLX-NEXT: vzeroupper
7186 %0 = bitcast <8 x i64> %__a to <16 x i32>
7187 %1 = bitcast <8 x i64> %__b to <16 x i32>
7188 %2 = icmp sgt <16 x i32> %0, %1
7189 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7190 %4 = bitcast <32 x i1> %3 to i32
7194 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
7195 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7196 ; VLX: # %bb.0: # %entry
7197 ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7198 ; VLX-NEXT: kmovd %k0, %eax
7199 ; VLX-NEXT: vzeroupper
7202 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7203 ; NoVLX: # %bb.0: # %entry
7204 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7205 ; NoVLX-NEXT: kmovw %k0, %eax
7206 ; NoVLX-NEXT: vzeroupper
7209 %0 = bitcast <8 x i64> %__a to <16 x i32>
7210 %load = load <8 x i64>, ptr %__b
7211 %1 = bitcast <8 x i64> %load to <16 x i32>
7212 %2 = icmp sgt <16 x i32> %0, %1
7213 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7214 %4 = bitcast <32 x i1> %3 to i32
7218 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7219 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7220 ; VLX: # %bb.0: # %entry
7221 ; VLX-NEXT: kmovd %edi, %k1
7222 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7223 ; VLX-NEXT: kmovd %k0, %eax
7224 ; VLX-NEXT: vzeroupper
7227 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7228 ; NoVLX: # %bb.0: # %entry
7229 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7230 ; NoVLX-NEXT: kmovw %k0, %eax
7231 ; NoVLX-NEXT: andl %edi, %eax
7232 ; NoVLX-NEXT: vzeroupper
7235 %0 = bitcast <8 x i64> %__a to <16 x i32>
7236 %1 = bitcast <8 x i64> %__b to <16 x i32>
7237 %2 = icmp sgt <16 x i32> %0, %1
7238 %3 = bitcast i16 %__u to <16 x i1>
7239 %4 = and <16 x i1> %2, %3
7240 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7241 %6 = bitcast <32 x i1> %5 to i32
7245 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
7246 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7247 ; VLX: # %bb.0: # %entry
7248 ; VLX-NEXT: kmovd %edi, %k1
7249 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7250 ; VLX-NEXT: kmovd %k0, %eax
7251 ; VLX-NEXT: vzeroupper
7254 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7255 ; NoVLX: # %bb.0: # %entry
7256 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0
7257 ; NoVLX-NEXT: kmovw %k0, %eax
7258 ; NoVLX-NEXT: andl %edi, %eax
7259 ; NoVLX-NEXT: vzeroupper
7262 %0 = bitcast <8 x i64> %__a to <16 x i32>
7263 %load = load <8 x i64>, ptr %__b
7264 %1 = bitcast <8 x i64> %load to <16 x i32>
7265 %2 = icmp sgt <16 x i32> %0, %1
7266 %3 = bitcast i16 %__u to <16 x i1>
7267 %4 = and <16 x i1> %2, %3
7268 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7269 %6 = bitcast <32 x i1> %5 to i32
7274 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
7275 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7276 ; VLX: # %bb.0: # %entry
7277 ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7278 ; VLX-NEXT: kmovd %k0, %eax
7279 ; VLX-NEXT: vzeroupper
7282 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7283 ; NoVLX: # %bb.0: # %entry
7284 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7285 ; NoVLX-NEXT: kmovw %k0, %eax
7286 ; NoVLX-NEXT: vzeroupper
7289 %0 = bitcast <8 x i64> %__a to <16 x i32>
7290 %load = load i32, ptr %__b
7291 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7292 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7293 %2 = icmp sgt <16 x i32> %0, %1
7294 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7295 %4 = bitcast <32 x i1> %3 to i32
7299 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
7300 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7301 ; VLX: # %bb.0: # %entry
7302 ; VLX-NEXT: kmovd %edi, %k1
7303 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7304 ; VLX-NEXT: kmovd %k0, %eax
7305 ; VLX-NEXT: vzeroupper
7308 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7309 ; NoVLX: # %bb.0: # %entry
7310 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7311 ; NoVLX-NEXT: kmovw %k0, %eax
7312 ; NoVLX-NEXT: andl %edi, %eax
7313 ; NoVLX-NEXT: vzeroupper
7316 %0 = bitcast <8 x i64> %__a to <16 x i32>
7317 %load = load i32, ptr %__b
7318 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7319 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7320 %2 = icmp sgt <16 x i32> %0, %1
7321 %3 = bitcast i16 %__u to <16 x i1>
7322 %4 = and <16 x i1> %3, %2
7323 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7324 %6 = bitcast <32 x i1> %5 to i32
7329 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7330 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7331 ; VLX: # %bb.0: # %entry
7332 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7333 ; VLX-NEXT: kmovq %k0, %rax
7334 ; VLX-NEXT: vzeroupper
7337 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7338 ; NoVLX: # %bb.0: # %entry
7339 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7340 ; NoVLX-NEXT: kmovw %k0, %eax
7341 ; NoVLX-NEXT: vzeroupper
7344 %0 = bitcast <8 x i64> %__a to <16 x i32>
7345 %1 = bitcast <8 x i64> %__b to <16 x i32>
7346 %2 = icmp sgt <16 x i32> %0, %1
7347 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7348 %4 = bitcast <64 x i1> %3 to i64
7352 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
7353 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7354 ; VLX: # %bb.0: # %entry
7355 ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7356 ; VLX-NEXT: kmovq %k0, %rax
7357 ; VLX-NEXT: vzeroupper
7360 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7361 ; NoVLX: # %bb.0: # %entry
7362 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7363 ; NoVLX-NEXT: kmovw %k0, %eax
7364 ; NoVLX-NEXT: vzeroupper
7367 %0 = bitcast <8 x i64> %__a to <16 x i32>
7368 %load = load <8 x i64>, ptr %__b
7369 %1 = bitcast <8 x i64> %load to <16 x i32>
7370 %2 = icmp sgt <16 x i32> %0, %1
7371 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7372 %4 = bitcast <64 x i1> %3 to i64
7376 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7377 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7378 ; VLX: # %bb.0: # %entry
7379 ; VLX-NEXT: kmovd %edi, %k1
7380 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7381 ; VLX-NEXT: kmovq %k0, %rax
7382 ; VLX-NEXT: vzeroupper
7385 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7386 ; NoVLX: # %bb.0: # %entry
7387 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7388 ; NoVLX-NEXT: kmovw %k0, %eax
7389 ; NoVLX-NEXT: andl %edi, %eax
7390 ; NoVLX-NEXT: vzeroupper
7393 %0 = bitcast <8 x i64> %__a to <16 x i32>
7394 %1 = bitcast <8 x i64> %__b to <16 x i32>
7395 %2 = icmp sgt <16 x i32> %0, %1
7396 %3 = bitcast i16 %__u to <16 x i1>
7397 %4 = and <16 x i1> %2, %3
7398 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7399 %6 = bitcast <64 x i1> %5 to i64
7403 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
7404 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7405 ; VLX: # %bb.0: # %entry
7406 ; VLX-NEXT: kmovd %edi, %k1
7407 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7408 ; VLX-NEXT: kmovq %k0, %rax
7409 ; VLX-NEXT: vzeroupper
7412 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7413 ; NoVLX: # %bb.0: # %entry
7414 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0
7415 ; NoVLX-NEXT: kmovw %k0, %eax
7416 ; NoVLX-NEXT: andl %edi, %eax
7417 ; NoVLX-NEXT: vzeroupper
7420 %0 = bitcast <8 x i64> %__a to <16 x i32>
7421 %load = load <8 x i64>, ptr %__b
7422 %1 = bitcast <8 x i64> %load to <16 x i32>
7423 %2 = icmp sgt <16 x i32> %0, %1
7424 %3 = bitcast i16 %__u to <16 x i1>
7425 %4 = and <16 x i1> %2, %3
7426 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7427 %6 = bitcast <64 x i1> %5 to i64
7432 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
7433 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7434 ; VLX: # %bb.0: # %entry
7435 ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7436 ; VLX-NEXT: kmovq %k0, %rax
7437 ; VLX-NEXT: vzeroupper
7440 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7441 ; NoVLX: # %bb.0: # %entry
7442 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7443 ; NoVLX-NEXT: kmovw %k0, %eax
7444 ; NoVLX-NEXT: vzeroupper
7447 %0 = bitcast <8 x i64> %__a to <16 x i32>
7448 %load = load i32, ptr %__b
7449 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7450 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7451 %2 = icmp sgt <16 x i32> %0, %1
7452 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7453 %4 = bitcast <64 x i1> %3 to i64
7457 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
7458 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7459 ; VLX: # %bb.0: # %entry
7460 ; VLX-NEXT: kmovd %edi, %k1
7461 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7462 ; VLX-NEXT: kmovq %k0, %rax
7463 ; VLX-NEXT: vzeroupper
7466 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7467 ; NoVLX: # %bb.0: # %entry
7468 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7469 ; NoVLX-NEXT: kmovw %k0, %eax
7470 ; NoVLX-NEXT: andl %edi, %eax
7471 ; NoVLX-NEXT: vzeroupper
7474 %0 = bitcast <8 x i64> %__a to <16 x i32>
7475 %load = load i32, ptr %__b
7476 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7477 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7478 %2 = icmp sgt <16 x i32> %0, %1
7479 %3 = bitcast i16 %__u to <16 x i1>
7480 %4 = and <16 x i1> %3, %2
7481 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7482 %6 = bitcast <64 x i1> %5 to i64
7487 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7488 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7489 ; VLX: # %bb.0: # %entry
7490 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7491 ; VLX-NEXT: kmovb %k0, %eax
7494 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7495 ; NoVLX: # %bb.0: # %entry
7496 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7497 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7498 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7499 ; NoVLX-NEXT: kmovw %k0, %eax
7500 ; NoVLX-NEXT: andl $3, %eax
7501 ; NoVLX-NEXT: vzeroupper
7504 %0 = bitcast <2 x i64> %__a to <2 x i64>
7505 %1 = bitcast <2 x i64> %__b to <2 x i64>
7506 %2 = icmp sgt <2 x i64> %0, %1
7507 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7508 %4 = bitcast <4 x i1> %3 to i4
7512 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7513 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7514 ; VLX: # %bb.0: # %entry
7515 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7516 ; VLX-NEXT: kmovb %k0, %eax
7519 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7520 ; NoVLX: # %bb.0: # %entry
7521 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7522 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7523 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7524 ; NoVLX-NEXT: kmovw %k0, %eax
7525 ; NoVLX-NEXT: andl $3, %eax
7526 ; NoVLX-NEXT: vzeroupper
7529 %0 = bitcast <2 x i64> %__a to <2 x i64>
7530 %load = load <2 x i64>, ptr %__b
7531 %1 = bitcast <2 x i64> %load to <2 x i64>
7532 %2 = icmp sgt <2 x i64> %0, %1
7533 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7534 %4 = bitcast <4 x i1> %3 to i4
7538 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7539 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7540 ; VLX: # %bb.0: # %entry
7541 ; VLX-NEXT: kmovd %edi, %k1
7542 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7543 ; VLX-NEXT: kmovb %k0, %eax
7546 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7547 ; NoVLX: # %bb.0: # %entry
7548 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7549 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7550 ; NoVLX-NEXT: kmovw %edi, %k1
7551 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7552 ; NoVLX-NEXT: kmovw %k0, %eax
7553 ; NoVLX-NEXT: andl $3, %eax
7554 ; NoVLX-NEXT: vzeroupper
7557 %0 = bitcast <2 x i64> %__a to <2 x i64>
7558 %1 = bitcast <2 x i64> %__b to <2 x i64>
7559 %2 = icmp sgt <2 x i64> %0, %1
7560 %3 = bitcast i8 %__u to <8 x i1>
7561 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7562 %4 = and <2 x i1> %2, %extract.i
7563 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7564 %6 = bitcast <4 x i1> %5 to i4
7568 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
7569 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7570 ; VLX: # %bb.0: # %entry
7571 ; VLX-NEXT: kmovd %edi, %k1
7572 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7573 ; VLX-NEXT: kmovb %k0, %eax
7576 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7577 ; NoVLX: # %bb.0: # %entry
7578 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7579 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7580 ; NoVLX-NEXT: kmovw %edi, %k1
7581 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7582 ; NoVLX-NEXT: kmovw %k0, %eax
7583 ; NoVLX-NEXT: andl $3, %eax
7584 ; NoVLX-NEXT: vzeroupper
7587 %0 = bitcast <2 x i64> %__a to <2 x i64>
7588 %load = load <2 x i64>, ptr %__b
7589 %1 = bitcast <2 x i64> %load to <2 x i64>
7590 %2 = icmp sgt <2 x i64> %0, %1
7591 %3 = bitcast i8 %__u to <8 x i1>
7592 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7593 %4 = and <2 x i1> %2, %extract.i
7594 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7595 %6 = bitcast <4 x i1> %5 to i4
7600 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7601 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7602 ; VLX: # %bb.0: # %entry
7603 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7604 ; VLX-NEXT: kmovb %k0, %eax
7607 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7608 ; NoVLX: # %bb.0: # %entry
7609 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7610 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
7611 ; NoVLX-NEXT: kmovw %k0, %eax
7612 ; NoVLX-NEXT: andl $3, %eax
7613 ; NoVLX-NEXT: vzeroupper
7616 %0 = bitcast <2 x i64> %__a to <2 x i64>
7617 %load = load i64, ptr %__b
7618 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7619 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7620 %2 = icmp sgt <2 x i64> %0, %1
7621 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7622 %4 = bitcast <4 x i1> %3 to i4
7626 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
7627 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7628 ; VLX: # %bb.0: # %entry
7629 ; VLX-NEXT: kmovd %edi, %k1
7630 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7631 ; VLX-NEXT: kmovb %k0, %eax
7634 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7635 ; NoVLX: # %bb.0: # %entry
7636 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7637 ; NoVLX-NEXT: kmovw %edi, %k1
7638 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
7639 ; NoVLX-NEXT: kmovw %k0, %eax
7640 ; NoVLX-NEXT: andl $3, %eax
7641 ; NoVLX-NEXT: vzeroupper
7644 %0 = bitcast <2 x i64> %__a to <2 x i64>
7645 %load = load i64, ptr %__b
7646 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7647 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7648 %2 = icmp sgt <2 x i64> %0, %1
7649 %3 = bitcast i8 %__u to <8 x i1>
7650 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7651 %4 = and <2 x i1> %extract.i, %2
7652 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7653 %6 = bitcast <4 x i1> %5 to i4
7658 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7659 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7660 ; VLX: # %bb.0: # %entry
7661 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7662 ; VLX-NEXT: kmovd %k0, %eax
7663 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7666 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7667 ; NoVLX: # %bb.0: # %entry
7668 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7669 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7670 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7671 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7672 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7673 ; NoVLX-NEXT: kmovw %k0, %eax
7674 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7675 ; NoVLX-NEXT: vzeroupper
7678 %0 = bitcast <2 x i64> %__a to <2 x i64>
7679 %1 = bitcast <2 x i64> %__b to <2 x i64>
7680 %2 = icmp sgt <2 x i64> %0, %1
7681 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7682 %4 = bitcast <8 x i1> %3 to i8
7686 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7687 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7688 ; VLX: # %bb.0: # %entry
7689 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7690 ; VLX-NEXT: kmovd %k0, %eax
7691 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7694 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7695 ; NoVLX: # %bb.0: # %entry
7696 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7697 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7698 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7699 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7700 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7701 ; NoVLX-NEXT: kmovw %k0, %eax
7702 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7703 ; NoVLX-NEXT: vzeroupper
7706 %0 = bitcast <2 x i64> %__a to <2 x i64>
7707 %load = load <2 x i64>, ptr %__b
7708 %1 = bitcast <2 x i64> %load to <2 x i64>
7709 %2 = icmp sgt <2 x i64> %0, %1
7710 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7711 %4 = bitcast <8 x i1> %3 to i8
7715 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7716 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7717 ; VLX: # %bb.0: # %entry
7718 ; VLX-NEXT: kmovd %edi, %k1
7719 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7720 ; VLX-NEXT: kmovd %k0, %eax
7721 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7724 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7725 ; NoVLX: # %bb.0: # %entry
7726 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7727 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7728 ; NoVLX-NEXT: kmovw %edi, %k1
7729 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7730 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7731 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7732 ; NoVLX-NEXT: kmovw %k0, %eax
7733 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7734 ; NoVLX-NEXT: vzeroupper
7737 %0 = bitcast <2 x i64> %__a to <2 x i64>
7738 %1 = bitcast <2 x i64> %__b to <2 x i64>
7739 %2 = icmp sgt <2 x i64> %0, %1
7740 %3 = bitcast i8 %__u to <8 x i1>
7741 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7742 %4 = and <2 x i1> %2, %extract.i
7743 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7744 %6 = bitcast <8 x i1> %5 to i8
7748 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
7749 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7750 ; VLX: # %bb.0: # %entry
7751 ; VLX-NEXT: kmovd %edi, %k1
7752 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7753 ; VLX-NEXT: kmovd %k0, %eax
7754 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7757 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7758 ; NoVLX: # %bb.0: # %entry
7759 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7760 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7761 ; NoVLX-NEXT: kmovw %edi, %k1
7762 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7763 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7764 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7765 ; NoVLX-NEXT: kmovw %k0, %eax
7766 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7767 ; NoVLX-NEXT: vzeroupper
7770 %0 = bitcast <2 x i64> %__a to <2 x i64>
7771 %load = load <2 x i64>, ptr %__b
7772 %1 = bitcast <2 x i64> %load to <2 x i64>
7773 %2 = icmp sgt <2 x i64> %0, %1
7774 %3 = bitcast i8 %__u to <8 x i1>
7775 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7776 %4 = and <2 x i1> %2, %extract.i
7777 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7778 %6 = bitcast <8 x i1> %5 to i8
7783 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7784 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7785 ; VLX: # %bb.0: # %entry
7786 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7787 ; VLX-NEXT: kmovd %k0, %eax
7788 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7791 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7792 ; NoVLX: # %bb.0: # %entry
7793 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7794 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
7795 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7796 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7797 ; NoVLX-NEXT: kmovw %k0, %eax
7798 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7799 ; NoVLX-NEXT: vzeroupper
7802 %0 = bitcast <2 x i64> %__a to <2 x i64>
7803 %load = load i64, ptr %__b
7804 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7805 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7806 %2 = icmp sgt <2 x i64> %0, %1
7807 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7808 %4 = bitcast <8 x i1> %3 to i8
7812 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
7813 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7814 ; VLX: # %bb.0: # %entry
7815 ; VLX-NEXT: kmovd %edi, %k1
7816 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7817 ; VLX-NEXT: kmovd %k0, %eax
7818 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7821 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7822 ; NoVLX: # %bb.0: # %entry
7823 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7824 ; NoVLX-NEXT: kmovw %edi, %k1
7825 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
7826 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7827 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7828 ; NoVLX-NEXT: kmovw %k0, %eax
7829 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7830 ; NoVLX-NEXT: vzeroupper
7833 %0 = bitcast <2 x i64> %__a to <2 x i64>
7834 %load = load i64, ptr %__b
7835 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7836 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7837 %2 = icmp sgt <2 x i64> %0, %1
7838 %3 = bitcast i8 %__u to <8 x i1>
7839 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7840 %4 = and <2 x i1> %extract.i, %2
7841 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7842 %6 = bitcast <8 x i1> %5 to i8
7847 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7848 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7849 ; VLX: # %bb.0: # %entry
7850 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7851 ; VLX-NEXT: kmovd %k0, %eax
7852 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7855 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7856 ; NoVLX: # %bb.0: # %entry
7857 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7858 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7859 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7860 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7861 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7862 ; NoVLX-NEXT: kmovw %k0, %eax
7863 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7864 ; NoVLX-NEXT: vzeroupper
7867 %0 = bitcast <2 x i64> %__a to <2 x i64>
7868 %1 = bitcast <2 x i64> %__b to <2 x i64>
7869 %2 = icmp sgt <2 x i64> %0, %1
7870 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7871 %4 = bitcast <16 x i1> %3 to i16
7875 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7876 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7877 ; VLX: # %bb.0: # %entry
7878 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7879 ; VLX-NEXT: kmovd %k0, %eax
7880 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7883 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7884 ; NoVLX: # %bb.0: # %entry
7885 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7886 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7887 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7888 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7889 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7890 ; NoVLX-NEXT: kmovw %k0, %eax
7891 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7892 ; NoVLX-NEXT: vzeroupper
7895 %0 = bitcast <2 x i64> %__a to <2 x i64>
7896 %load = load <2 x i64>, ptr %__b
7897 %1 = bitcast <2 x i64> %load to <2 x i64>
7898 %2 = icmp sgt <2 x i64> %0, %1
7899 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7900 %4 = bitcast <16 x i1> %3 to i16
7904 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7905 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
7906 ; VLX: # %bb.0: # %entry
7907 ; VLX-NEXT: kmovd %edi, %k1
7908 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7909 ; VLX-NEXT: kmovd %k0, %eax
7910 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7913 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
7914 ; NoVLX: # %bb.0: # %entry
7915 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7916 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7917 ; NoVLX-NEXT: kmovw %edi, %k1
7918 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7919 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7920 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7921 ; NoVLX-NEXT: kmovw %k0, %eax
7922 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7923 ; NoVLX-NEXT: vzeroupper
7926 %0 = bitcast <2 x i64> %__a to <2 x i64>
7927 %1 = bitcast <2 x i64> %__b to <2 x i64>
7928 %2 = icmp sgt <2 x i64> %0, %1
7929 %3 = bitcast i8 %__u to <8 x i1>
7930 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7931 %4 = and <2 x i1> %2, %extract.i
7932 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7933 %6 = bitcast <16 x i1> %5 to i16
7937 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
7938 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
7939 ; VLX: # %bb.0: # %entry
7940 ; VLX-NEXT: kmovd %edi, %k1
7941 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7942 ; VLX-NEXT: kmovd %k0, %eax
7943 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7946 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
7947 ; NoVLX: # %bb.0: # %entry
7948 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7949 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7950 ; NoVLX-NEXT: kmovw %edi, %k1
7951 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7952 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7953 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7954 ; NoVLX-NEXT: kmovw %k0, %eax
7955 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7956 ; NoVLX-NEXT: vzeroupper
7959 %0 = bitcast <2 x i64> %__a to <2 x i64>
7960 %load = load <2 x i64>, ptr %__b
7961 %1 = bitcast <2 x i64> %load to <2 x i64>
7962 %2 = icmp sgt <2 x i64> %0, %1
7963 %3 = bitcast i8 %__u to <8 x i1>
7964 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7965 %4 = and <2 x i1> %2, %extract.i
7966 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7967 %6 = bitcast <16 x i1> %5 to i16
7972 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
7973 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
7974 ; VLX: # %bb.0: # %entry
7975 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7976 ; VLX-NEXT: kmovd %k0, %eax
7977 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7980 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
7981 ; NoVLX: # %bb.0: # %entry
7982 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7983 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
7984 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7985 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7986 ; NoVLX-NEXT: kmovw %k0, %eax
7987 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7988 ; NoVLX-NEXT: vzeroupper
7991 %0 = bitcast <2 x i64> %__a to <2 x i64>
7992 %load = load i64, ptr %__b
7993 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7994 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7995 %2 = icmp sgt <2 x i64> %0, %1
7996 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7997 %4 = bitcast <16 x i1> %3 to i16
8001 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
8002 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8003 ; VLX: # %bb.0: # %entry
8004 ; VLX-NEXT: kmovd %edi, %k1
8005 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8006 ; VLX-NEXT: kmovd %k0, %eax
8007 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8010 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8011 ; NoVLX: # %bb.0: # %entry
8012 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8013 ; NoVLX-NEXT: kmovw %edi, %k1
8014 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8015 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8016 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8017 ; NoVLX-NEXT: kmovw %k0, %eax
8018 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8019 ; NoVLX-NEXT: vzeroupper
8022 %0 = bitcast <2 x i64> %__a to <2 x i64>
8023 %load = load i64, ptr %__b
8024 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8025 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8026 %2 = icmp sgt <2 x i64> %0, %1
8027 %3 = bitcast i8 %__u to <8 x i1>
8028 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8029 %4 = and <2 x i1> %extract.i, %2
8030 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8031 %6 = bitcast <16 x i1> %5 to i16
8036 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8037 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8038 ; VLX: # %bb.0: # %entry
8039 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
8040 ; VLX-NEXT: kmovd %k0, %eax
8043 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8044 ; NoVLX: # %bb.0: # %entry
8045 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8046 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8047 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8048 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8049 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8050 ; NoVLX-NEXT: kmovw %k0, %eax
8051 ; NoVLX-NEXT: vzeroupper
8054 %0 = bitcast <2 x i64> %__a to <2 x i64>
8055 %1 = bitcast <2 x i64> %__b to <2 x i64>
8056 %2 = icmp sgt <2 x i64> %0, %1
8057 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8058 %4 = bitcast <32 x i1> %3 to i32
8062 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
8063 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8064 ; VLX: # %bb.0: # %entry
8065 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
8066 ; VLX-NEXT: kmovd %k0, %eax
8069 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8070 ; NoVLX: # %bb.0: # %entry
8071 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8072 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
8073 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8074 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8075 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8076 ; NoVLX-NEXT: kmovw %k0, %eax
8077 ; NoVLX-NEXT: vzeroupper
8080 %0 = bitcast <2 x i64> %__a to <2 x i64>
8081 %load = load <2 x i64>, ptr %__b
8082 %1 = bitcast <2 x i64> %load to <2 x i64>
8083 %2 = icmp sgt <2 x i64> %0, %1
8084 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8085 %4 = bitcast <32 x i1> %3 to i32
8089 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8090 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8091 ; VLX: # %bb.0: # %entry
8092 ; VLX-NEXT: kmovd %edi, %k1
8093 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8094 ; VLX-NEXT: kmovd %k0, %eax
8097 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8098 ; NoVLX: # %bb.0: # %entry
8099 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8100 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8101 ; NoVLX-NEXT: kmovw %edi, %k1
8102 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8103 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8104 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8105 ; NoVLX-NEXT: kmovw %k0, %eax
8106 ; NoVLX-NEXT: vzeroupper
8109 %0 = bitcast <2 x i64> %__a to <2 x i64>
8110 %1 = bitcast <2 x i64> %__b to <2 x i64>
8111 %2 = icmp sgt <2 x i64> %0, %1
8112 %3 = bitcast i8 %__u to <8 x i1>
8113 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8114 %4 = and <2 x i1> %2, %extract.i
8115 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8116 %6 = bitcast <32 x i1> %5 to i32
8120 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
8121 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8122 ; VLX: # %bb.0: # %entry
8123 ; VLX-NEXT: kmovd %edi, %k1
8124 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8125 ; VLX-NEXT: kmovd %k0, %eax
8128 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8129 ; NoVLX: # %bb.0: # %entry
8130 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8131 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
8132 ; NoVLX-NEXT: kmovw %edi, %k1
8133 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8134 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8135 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8136 ; NoVLX-NEXT: kmovw %k0, %eax
8137 ; NoVLX-NEXT: vzeroupper
8140 %0 = bitcast <2 x i64> %__a to <2 x i64>
8141 %load = load <2 x i64>, ptr %__b
8142 %1 = bitcast <2 x i64> %load to <2 x i64>
8143 %2 = icmp sgt <2 x i64> %0, %1
8144 %3 = bitcast i8 %__u to <8 x i1>
8145 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8146 %4 = and <2 x i1> %2, %extract.i
8147 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8148 %6 = bitcast <32 x i1> %5 to i32
8153 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
8154 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8155 ; VLX: # %bb.0: # %entry
8156 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8157 ; VLX-NEXT: kmovd %k0, %eax
8160 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8161 ; NoVLX: # %bb.0: # %entry
8162 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8163 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8164 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8165 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8166 ; NoVLX-NEXT: kmovw %k0, %eax
8167 ; NoVLX-NEXT: vzeroupper
8170 %0 = bitcast <2 x i64> %__a to <2 x i64>
8171 %load = load i64, ptr %__b
8172 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8173 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8174 %2 = icmp sgt <2 x i64> %0, %1
8175 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8176 %4 = bitcast <32 x i1> %3 to i32
8180 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
8181 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8182 ; VLX: # %bb.0: # %entry
8183 ; VLX-NEXT: kmovd %edi, %k1
8184 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8185 ; VLX-NEXT: kmovd %k0, %eax
8188 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8189 ; NoVLX: # %bb.0: # %entry
8190 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8191 ; NoVLX-NEXT: kmovw %edi, %k1
8192 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8193 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8194 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8195 ; NoVLX-NEXT: kmovw %k0, %eax
8196 ; NoVLX-NEXT: vzeroupper
8199 %0 = bitcast <2 x i64> %__a to <2 x i64>
8200 %load = load i64, ptr %__b
8201 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8202 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8203 %2 = icmp sgt <2 x i64> %0, %1
8204 %3 = bitcast i8 %__u to <8 x i1>
8205 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8206 %4 = and <2 x i1> %extract.i, %2
8207 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8208 %6 = bitcast <32 x i1> %5 to i32
8213 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8214 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8215 ; VLX: # %bb.0: # %entry
8216 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
8217 ; VLX-NEXT: kmovq %k0, %rax
8220 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8221 ; NoVLX: # %bb.0: # %entry
8222 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8223 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8224 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8225 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8226 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8227 ; NoVLX-NEXT: kmovw %k0, %eax
8228 ; NoVLX-NEXT: vzeroupper
8231 %0 = bitcast <2 x i64> %__a to <2 x i64>
8232 %1 = bitcast <2 x i64> %__b to <2 x i64>
8233 %2 = icmp sgt <2 x i64> %0, %1
8234 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8235 %4 = bitcast <64 x i1> %3 to i64
8239 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
8240 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8241 ; VLX: # %bb.0: # %entry
8242 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
8243 ; VLX-NEXT: kmovq %k0, %rax
8246 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8247 ; NoVLX: # %bb.0: # %entry
8248 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8249 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
8250 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8251 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8252 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8253 ; NoVLX-NEXT: kmovw %k0, %eax
8254 ; NoVLX-NEXT: vzeroupper
8257 %0 = bitcast <2 x i64> %__a to <2 x i64>
8258 %load = load <2 x i64>, ptr %__b
8259 %1 = bitcast <2 x i64> %load to <2 x i64>
8260 %2 = icmp sgt <2 x i64> %0, %1
8261 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8262 %4 = bitcast <64 x i1> %3 to i64
8266 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8267 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8268 ; VLX: # %bb.0: # %entry
8269 ; VLX-NEXT: kmovd %edi, %k1
8270 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8271 ; VLX-NEXT: kmovq %k0, %rax
8274 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8275 ; NoVLX: # %bb.0: # %entry
8276 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8277 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8278 ; NoVLX-NEXT: kmovw %edi, %k1
8279 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8280 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8281 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8282 ; NoVLX-NEXT: kmovw %k0, %eax
8283 ; NoVLX-NEXT: vzeroupper
8286 %0 = bitcast <2 x i64> %__a to <2 x i64>
8287 %1 = bitcast <2 x i64> %__b to <2 x i64>
8288 %2 = icmp sgt <2 x i64> %0, %1
8289 %3 = bitcast i8 %__u to <8 x i1>
8290 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8291 %4 = and <2 x i1> %2, %extract.i
8292 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8293 %6 = bitcast <64 x i1> %5 to i64
8297 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
8298 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8299 ; VLX: # %bb.0: # %entry
8300 ; VLX-NEXT: kmovd %edi, %k1
8301 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8302 ; VLX-NEXT: kmovq %k0, %rax
8305 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8306 ; NoVLX: # %bb.0: # %entry
8307 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8308 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
8309 ; NoVLX-NEXT: kmovw %edi, %k1
8310 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8311 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8312 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8313 ; NoVLX-NEXT: kmovw %k0, %eax
8314 ; NoVLX-NEXT: vzeroupper
8317 %0 = bitcast <2 x i64> %__a to <2 x i64>
8318 %load = load <2 x i64>, ptr %__b
8319 %1 = bitcast <2 x i64> %load to <2 x i64>
8320 %2 = icmp sgt <2 x i64> %0, %1
8321 %3 = bitcast i8 %__u to <8 x i1>
8322 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8323 %4 = and <2 x i1> %2, %extract.i
8324 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8325 %6 = bitcast <64 x i1> %5 to i64
8330 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
8331 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8332 ; VLX: # %bb.0: # %entry
8333 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8334 ; VLX-NEXT: kmovq %k0, %rax
8337 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8338 ; NoVLX: # %bb.0: # %entry
8339 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8340 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8341 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8342 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8343 ; NoVLX-NEXT: kmovw %k0, %eax
8344 ; NoVLX-NEXT: vzeroupper
8347 %0 = bitcast <2 x i64> %__a to <2 x i64>
8348 %load = load i64, ptr %__b
8349 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8350 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8351 %2 = icmp sgt <2 x i64> %0, %1
8352 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8353 %4 = bitcast <64 x i1> %3 to i64
8357 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
8358 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8359 ; VLX: # %bb.0: # %entry
8360 ; VLX-NEXT: kmovd %edi, %k1
8361 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8362 ; VLX-NEXT: kmovq %k0, %rax
8365 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8366 ; NoVLX: # %bb.0: # %entry
8367 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8368 ; NoVLX-NEXT: kmovw %edi, %k1
8369 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8370 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8371 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8372 ; NoVLX-NEXT: kmovw %k0, %eax
8373 ; NoVLX-NEXT: vzeroupper
8376 %0 = bitcast <2 x i64> %__a to <2 x i64>
8377 %load = load i64, ptr %__b
8378 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8379 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8380 %2 = icmp sgt <2 x i64> %0, %1
8381 %3 = bitcast i8 %__u to <8 x i1>
8382 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8383 %4 = and <2 x i1> %extract.i, %2
8384 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8385 %6 = bitcast <64 x i1> %5 to i64
8390 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8391 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8392 ; VLX: # %bb.0: # %entry
8393 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8394 ; VLX-NEXT: kmovd %k0, %eax
8395 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8396 ; VLX-NEXT: vzeroupper
8399 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8400 ; NoVLX: # %bb.0: # %entry
8401 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8402 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8403 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8404 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8405 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8406 ; NoVLX-NEXT: kmovw %k0, %eax
8407 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8408 ; NoVLX-NEXT: vzeroupper
8411 %0 = bitcast <4 x i64> %__a to <4 x i64>
8412 %1 = bitcast <4 x i64> %__b to <4 x i64>
8413 %2 = icmp sgt <4 x i64> %0, %1
8414 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8415 %4 = bitcast <8 x i1> %3 to i8
8419 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8420 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8421 ; VLX: # %bb.0: # %entry
8422 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8423 ; VLX-NEXT: kmovd %k0, %eax
8424 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8425 ; VLX-NEXT: vzeroupper
8428 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8429 ; NoVLX: # %bb.0: # %entry
8430 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8431 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8432 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8433 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8434 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8435 ; NoVLX-NEXT: kmovw %k0, %eax
8436 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8437 ; NoVLX-NEXT: vzeroupper
8440 %0 = bitcast <4 x i64> %__a to <4 x i64>
8441 %load = load <4 x i64>, ptr %__b
8442 %1 = bitcast <4 x i64> %load to <4 x i64>
8443 %2 = icmp sgt <4 x i64> %0, %1
8444 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8445 %4 = bitcast <8 x i1> %3 to i8
8449 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8450 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8451 ; VLX: # %bb.0: # %entry
8452 ; VLX-NEXT: kmovd %edi, %k1
8453 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8454 ; VLX-NEXT: kmovd %k0, %eax
8455 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8456 ; VLX-NEXT: vzeroupper
8459 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8460 ; NoVLX: # %bb.0: # %entry
8461 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8462 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8463 ; NoVLX-NEXT: kmovw %edi, %k1
8464 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8465 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8466 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8467 ; NoVLX-NEXT: kmovw %k0, %eax
8468 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8469 ; NoVLX-NEXT: vzeroupper
8472 %0 = bitcast <4 x i64> %__a to <4 x i64>
8473 %1 = bitcast <4 x i64> %__b to <4 x i64>
8474 %2 = icmp sgt <4 x i64> %0, %1
8475 %3 = bitcast i8 %__u to <8 x i1>
8476 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8477 %4 = and <4 x i1> %2, %extract.i
8478 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8479 %6 = bitcast <8 x i1> %5 to i8
8483 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8484 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8485 ; VLX: # %bb.0: # %entry
8486 ; VLX-NEXT: kmovd %edi, %k1
8487 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8488 ; VLX-NEXT: kmovd %k0, %eax
8489 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8490 ; VLX-NEXT: vzeroupper
8493 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8494 ; NoVLX: # %bb.0: # %entry
8495 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8496 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8497 ; NoVLX-NEXT: kmovw %edi, %k1
8498 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8499 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8500 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8501 ; NoVLX-NEXT: kmovw %k0, %eax
8502 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8503 ; NoVLX-NEXT: vzeroupper
8506 %0 = bitcast <4 x i64> %__a to <4 x i64>
8507 %load = load <4 x i64>, ptr %__b
8508 %1 = bitcast <4 x i64> %load to <4 x i64>
8509 %2 = icmp sgt <4 x i64> %0, %1
8510 %3 = bitcast i8 %__u to <8 x i1>
8511 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8512 %4 = and <4 x i1> %2, %extract.i
8513 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8514 %6 = bitcast <8 x i1> %5 to i8
8519 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8520 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8521 ; VLX: # %bb.0: # %entry
8522 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8523 ; VLX-NEXT: kmovd %k0, %eax
8524 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8525 ; VLX-NEXT: vzeroupper
8528 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8529 ; NoVLX: # %bb.0: # %entry
8530 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8531 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8532 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8533 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8534 ; NoVLX-NEXT: kmovw %k0, %eax
8535 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8536 ; NoVLX-NEXT: vzeroupper
8539 %0 = bitcast <4 x i64> %__a to <4 x i64>
8540 %load = load i64, ptr %__b
8541 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8542 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8543 %2 = icmp sgt <4 x i64> %0, %1
8544 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8545 %4 = bitcast <8 x i1> %3 to i8
8549 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8550 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8551 ; VLX: # %bb.0: # %entry
8552 ; VLX-NEXT: kmovd %edi, %k1
8553 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8554 ; VLX-NEXT: kmovd %k0, %eax
8555 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8556 ; VLX-NEXT: vzeroupper
8559 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8560 ; NoVLX: # %bb.0: # %entry
8561 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8562 ; NoVLX-NEXT: kmovw %edi, %k1
8563 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8564 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8565 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8566 ; NoVLX-NEXT: kmovw %k0, %eax
8567 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8568 ; NoVLX-NEXT: vzeroupper
8571 %0 = bitcast <4 x i64> %__a to <4 x i64>
8572 %load = load i64, ptr %__b
8573 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8574 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8575 %2 = icmp sgt <4 x i64> %0, %1
8576 %3 = bitcast i8 %__u to <8 x i1>
8577 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8578 %4 = and <4 x i1> %extract.i, %2
8579 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8580 %6 = bitcast <8 x i1> %5 to i8
8585 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8586 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8587 ; VLX: # %bb.0: # %entry
8588 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8589 ; VLX-NEXT: kmovd %k0, %eax
8590 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8591 ; VLX-NEXT: vzeroupper
8594 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8595 ; NoVLX: # %bb.0: # %entry
8596 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8597 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8598 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8599 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8600 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8601 ; NoVLX-NEXT: kmovw %k0, %eax
8602 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8603 ; NoVLX-NEXT: vzeroupper
8606 %0 = bitcast <4 x i64> %__a to <4 x i64>
8607 %1 = bitcast <4 x i64> %__b to <4 x i64>
8608 %2 = icmp sgt <4 x i64> %0, %1
8609 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8610 %4 = bitcast <16 x i1> %3 to i16
8614 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8615 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8616 ; VLX: # %bb.0: # %entry
8617 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8618 ; VLX-NEXT: kmovd %k0, %eax
8619 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8620 ; VLX-NEXT: vzeroupper
8623 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8624 ; NoVLX: # %bb.0: # %entry
8625 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8626 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8627 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8628 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8629 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8630 ; NoVLX-NEXT: kmovw %k0, %eax
8631 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8632 ; NoVLX-NEXT: vzeroupper
8635 %0 = bitcast <4 x i64> %__a to <4 x i64>
8636 %load = load <4 x i64>, ptr %__b
8637 %1 = bitcast <4 x i64> %load to <4 x i64>
8638 %2 = icmp sgt <4 x i64> %0, %1
8639 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8640 %4 = bitcast <16 x i1> %3 to i16
8644 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8645 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8646 ; VLX: # %bb.0: # %entry
8647 ; VLX-NEXT: kmovd %edi, %k1
8648 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8649 ; VLX-NEXT: kmovd %k0, %eax
8650 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8651 ; VLX-NEXT: vzeroupper
8654 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8655 ; NoVLX: # %bb.0: # %entry
8656 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8657 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8658 ; NoVLX-NEXT: kmovw %edi, %k1
8659 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8660 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8661 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8662 ; NoVLX-NEXT: kmovw %k0, %eax
8663 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8664 ; NoVLX-NEXT: vzeroupper
8667 %0 = bitcast <4 x i64> %__a to <4 x i64>
8668 %1 = bitcast <4 x i64> %__b to <4 x i64>
8669 %2 = icmp sgt <4 x i64> %0, %1
8670 %3 = bitcast i8 %__u to <8 x i1>
8671 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8672 %4 = and <4 x i1> %2, %extract.i
8673 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8674 %6 = bitcast <16 x i1> %5 to i16
8678 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8679 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8680 ; VLX: # %bb.0: # %entry
8681 ; VLX-NEXT: kmovd %edi, %k1
8682 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8683 ; VLX-NEXT: kmovd %k0, %eax
8684 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8685 ; VLX-NEXT: vzeroupper
8688 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8689 ; NoVLX: # %bb.0: # %entry
8690 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8691 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8692 ; NoVLX-NEXT: kmovw %edi, %k1
8693 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8694 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8695 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8696 ; NoVLX-NEXT: kmovw %k0, %eax
8697 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8698 ; NoVLX-NEXT: vzeroupper
8701 %0 = bitcast <4 x i64> %__a to <4 x i64>
8702 %load = load <4 x i64>, ptr %__b
8703 %1 = bitcast <4 x i64> %load to <4 x i64>
8704 %2 = icmp sgt <4 x i64> %0, %1
8705 %3 = bitcast i8 %__u to <8 x i1>
8706 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8707 %4 = and <4 x i1> %2, %extract.i
8708 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8709 %6 = bitcast <16 x i1> %5 to i16
8714 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8715 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8716 ; VLX: # %bb.0: # %entry
8717 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8718 ; VLX-NEXT: kmovd %k0, %eax
8719 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8720 ; VLX-NEXT: vzeroupper
8723 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8724 ; NoVLX: # %bb.0: # %entry
8725 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8726 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8727 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8728 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8729 ; NoVLX-NEXT: kmovw %k0, %eax
8730 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8731 ; NoVLX-NEXT: vzeroupper
8734 %0 = bitcast <4 x i64> %__a to <4 x i64>
8735 %load = load i64, ptr %__b
8736 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8737 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8738 %2 = icmp sgt <4 x i64> %0, %1
8739 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8740 %4 = bitcast <16 x i1> %3 to i16
8744 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8745 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8746 ; VLX: # %bb.0: # %entry
8747 ; VLX-NEXT: kmovd %edi, %k1
8748 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8749 ; VLX-NEXT: kmovd %k0, %eax
8750 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8751 ; VLX-NEXT: vzeroupper
8754 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8755 ; NoVLX: # %bb.0: # %entry
8756 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8757 ; NoVLX-NEXT: kmovw %edi, %k1
8758 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8759 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8760 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8761 ; NoVLX-NEXT: kmovw %k0, %eax
8762 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8763 ; NoVLX-NEXT: vzeroupper
8766 %0 = bitcast <4 x i64> %__a to <4 x i64>
8767 %load = load i64, ptr %__b
8768 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8769 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8770 %2 = icmp sgt <4 x i64> %0, %1
8771 %3 = bitcast i8 %__u to <8 x i1>
8772 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8773 %4 = and <4 x i1> %extract.i, %2
8774 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8775 %6 = bitcast <16 x i1> %5 to i16
8780 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8781 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8782 ; VLX: # %bb.0: # %entry
8783 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8784 ; VLX-NEXT: kmovd %k0, %eax
8785 ; VLX-NEXT: vzeroupper
8788 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8789 ; NoVLX: # %bb.0: # %entry
8790 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8791 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8792 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8793 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8794 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8795 ; NoVLX-NEXT: kmovw %k0, %eax
8796 ; NoVLX-NEXT: vzeroupper
8799 %0 = bitcast <4 x i64> %__a to <4 x i64>
8800 %1 = bitcast <4 x i64> %__b to <4 x i64>
8801 %2 = icmp sgt <4 x i64> %0, %1
8802 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8803 %4 = bitcast <32 x i1> %3 to i32
8807 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8808 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8809 ; VLX: # %bb.0: # %entry
8810 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8811 ; VLX-NEXT: kmovd %k0, %eax
8812 ; VLX-NEXT: vzeroupper
8815 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8816 ; NoVLX: # %bb.0: # %entry
8817 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8818 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8819 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8820 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8821 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8822 ; NoVLX-NEXT: kmovw %k0, %eax
8823 ; NoVLX-NEXT: vzeroupper
8826 %0 = bitcast <4 x i64> %__a to <4 x i64>
8827 %load = load <4 x i64>, ptr %__b
8828 %1 = bitcast <4 x i64> %load to <4 x i64>
8829 %2 = icmp sgt <4 x i64> %0, %1
8830 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8831 %4 = bitcast <32 x i1> %3 to i32
8835 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8836 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8837 ; VLX: # %bb.0: # %entry
8838 ; VLX-NEXT: kmovd %edi, %k1
8839 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8840 ; VLX-NEXT: kmovd %k0, %eax
8841 ; VLX-NEXT: vzeroupper
8844 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8845 ; NoVLX: # %bb.0: # %entry
8846 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8847 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8848 ; NoVLX-NEXT: kmovw %edi, %k1
8849 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8850 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8851 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8852 ; NoVLX-NEXT: kmovw %k0, %eax
8853 ; NoVLX-NEXT: vzeroupper
8856 %0 = bitcast <4 x i64> %__a to <4 x i64>
8857 %1 = bitcast <4 x i64> %__b to <4 x i64>
8858 %2 = icmp sgt <4 x i64> %0, %1
8859 %3 = bitcast i8 %__u to <8 x i1>
8860 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8861 %4 = and <4 x i1> %2, %extract.i
8862 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8863 %6 = bitcast <32 x i1> %5 to i32
8867 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8868 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
8869 ; VLX: # %bb.0: # %entry
8870 ; VLX-NEXT: kmovd %edi, %k1
8871 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8872 ; VLX-NEXT: kmovd %k0, %eax
8873 ; VLX-NEXT: vzeroupper
8876 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
8877 ; NoVLX: # %bb.0: # %entry
8878 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8879 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8880 ; NoVLX-NEXT: kmovw %edi, %k1
8881 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8882 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8883 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8884 ; NoVLX-NEXT: kmovw %k0, %eax
8885 ; NoVLX-NEXT: vzeroupper
8888 %0 = bitcast <4 x i64> %__a to <4 x i64>
8889 %load = load <4 x i64>, ptr %__b
8890 %1 = bitcast <4 x i64> %load to <4 x i64>
8891 %2 = icmp sgt <4 x i64> %0, %1
8892 %3 = bitcast i8 %__u to <8 x i1>
8893 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8894 %4 = and <4 x i1> %2, %extract.i
8895 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8896 %6 = bitcast <32 x i1> %5 to i32
8901 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8902 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8903 ; VLX: # %bb.0: # %entry
8904 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8905 ; VLX-NEXT: kmovd %k0, %eax
8906 ; VLX-NEXT: vzeroupper
8909 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8910 ; NoVLX: # %bb.0: # %entry
8911 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8912 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
8913 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8914 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8915 ; NoVLX-NEXT: kmovw %k0, %eax
8916 ; NoVLX-NEXT: vzeroupper
8919 %0 = bitcast <4 x i64> %__a to <4 x i64>
8920 %load = load i64, ptr %__b
8921 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8922 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8923 %2 = icmp sgt <4 x i64> %0, %1
8924 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8925 %4 = bitcast <32 x i1> %3 to i32
8929 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
8930 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8931 ; VLX: # %bb.0: # %entry
8932 ; VLX-NEXT: kmovd %edi, %k1
8933 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8934 ; VLX-NEXT: kmovd %k0, %eax
8935 ; VLX-NEXT: vzeroupper
8938 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
8939 ; NoVLX: # %bb.0: # %entry
8940 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8941 ; NoVLX-NEXT: kmovw %edi, %k1
8942 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
8943 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8944 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8945 ; NoVLX-NEXT: kmovw %k0, %eax
8946 ; NoVLX-NEXT: vzeroupper
8949 %0 = bitcast <4 x i64> %__a to <4 x i64>
8950 %load = load i64, ptr %__b
8951 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8952 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8953 %2 = icmp sgt <4 x i64> %0, %1
8954 %3 = bitcast i8 %__u to <8 x i1>
8955 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8956 %4 = and <4 x i1> %extract.i, %2
8957 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8958 %6 = bitcast <32 x i1> %5 to i32
8963 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8964 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
8965 ; VLX: # %bb.0: # %entry
8966 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8967 ; VLX-NEXT: kmovq %k0, %rax
8968 ; VLX-NEXT: vzeroupper
8971 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
8972 ; NoVLX: # %bb.0: # %entry
8973 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8974 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8975 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8976 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8977 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8978 ; NoVLX-NEXT: kmovw %k0, %eax
8979 ; NoVLX-NEXT: vzeroupper
8982 %0 = bitcast <4 x i64> %__a to <4 x i64>
8983 %1 = bitcast <4 x i64> %__b to <4 x i64>
8984 %2 = icmp sgt <4 x i64> %0, %1
8985 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8986 %4 = bitcast <64 x i1> %3 to i64
8990 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
8991 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
8992 ; VLX: # %bb.0: # %entry
8993 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8994 ; VLX-NEXT: kmovq %k0, %rax
8995 ; VLX-NEXT: vzeroupper
8998 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
8999 ; NoVLX: # %bb.0: # %entry
9000 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9001 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
9002 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9003 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9004 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9005 ; NoVLX-NEXT: kmovw %k0, %eax
9006 ; NoVLX-NEXT: vzeroupper
9009 %0 = bitcast <4 x i64> %__a to <4 x i64>
9010 %load = load <4 x i64>, ptr %__b
9011 %1 = bitcast <4 x i64> %load to <4 x i64>
9012 %2 = icmp sgt <4 x i64> %0, %1
9013 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9014 %4 = bitcast <64 x i1> %3 to i64
9018 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9019 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9020 ; VLX: # %bb.0: # %entry
9021 ; VLX-NEXT: kmovd %edi, %k1
9022 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
9023 ; VLX-NEXT: kmovq %k0, %rax
9024 ; VLX-NEXT: vzeroupper
9027 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9028 ; NoVLX: # %bb.0: # %entry
9029 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
9030 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9031 ; NoVLX-NEXT: kmovw %edi, %k1
9032 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9033 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9034 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9035 ; NoVLX-NEXT: kmovw %k0, %eax
9036 ; NoVLX-NEXT: vzeroupper
9039 %0 = bitcast <4 x i64> %__a to <4 x i64>
9040 %1 = bitcast <4 x i64> %__b to <4 x i64>
9041 %2 = icmp sgt <4 x i64> %0, %1
9042 %3 = bitcast i8 %__u to <8 x i1>
9043 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9044 %4 = and <4 x i1> %2, %extract.i
9045 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9046 %6 = bitcast <64 x i1> %5 to i64
9050 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
9051 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9052 ; VLX: # %bb.0: # %entry
9053 ; VLX-NEXT: kmovd %edi, %k1
9054 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
9055 ; VLX-NEXT: kmovq %k0, %rax
9056 ; VLX-NEXT: vzeroupper
9059 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9060 ; NoVLX: # %bb.0: # %entry
9061 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9062 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
9063 ; NoVLX-NEXT: kmovw %edi, %k1
9064 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9065 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9066 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9067 ; NoVLX-NEXT: kmovw %k0, %eax
9068 ; NoVLX-NEXT: vzeroupper
9071 %0 = bitcast <4 x i64> %__a to <4 x i64>
9072 %load = load <4 x i64>, ptr %__b
9073 %1 = bitcast <4 x i64> %load to <4 x i64>
9074 %2 = icmp sgt <4 x i64> %0, %1
9075 %3 = bitcast i8 %__u to <8 x i1>
9076 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9077 %4 = and <4 x i1> %2, %extract.i
9078 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9079 %6 = bitcast <64 x i1> %5 to i64
9084 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
9085 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9086 ; VLX: # %bb.0: # %entry
9087 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
9088 ; VLX-NEXT: kmovq %k0, %rax
9089 ; VLX-NEXT: vzeroupper
9092 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9093 ; NoVLX: # %bb.0: # %entry
9094 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9095 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9096 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9097 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9098 ; NoVLX-NEXT: kmovw %k0, %eax
9099 ; NoVLX-NEXT: vzeroupper
9102 %0 = bitcast <4 x i64> %__a to <4 x i64>
9103 %load = load i64, ptr %__b
9104 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9105 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9106 %2 = icmp sgt <4 x i64> %0, %1
9107 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9108 %4 = bitcast <64 x i1> %3 to i64
9112 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
9113 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9114 ; VLX: # %bb.0: # %entry
9115 ; VLX-NEXT: kmovd %edi, %k1
9116 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
9117 ; VLX-NEXT: kmovq %k0, %rax
9118 ; VLX-NEXT: vzeroupper
9121 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9122 ; NoVLX: # %bb.0: # %entry
9123 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9124 ; NoVLX-NEXT: kmovw %edi, %k1
9125 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9126 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9127 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9128 ; NoVLX-NEXT: kmovw %k0, %eax
9129 ; NoVLX-NEXT: vzeroupper
9132 %0 = bitcast <4 x i64> %__a to <4 x i64>
9133 %load = load i64, ptr %__b
9134 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9135 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9136 %2 = icmp sgt <4 x i64> %0, %1
9137 %3 = bitcast i8 %__u to <8 x i1>
9138 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9139 %4 = and <4 x i1> %extract.i, %2
9140 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9141 %6 = bitcast <64 x i1> %5 to i64
9146 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9147 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9148 ; VLX: # %bb.0: # %entry
9149 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9150 ; VLX-NEXT: kmovd %k0, %eax
9151 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9152 ; VLX-NEXT: vzeroupper
9155 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9156 ; NoVLX: # %bb.0: # %entry
9157 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9158 ; NoVLX-NEXT: kmovw %k0, %eax
9159 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9160 ; NoVLX-NEXT: vzeroupper
9163 %0 = bitcast <8 x i64> %__a to <8 x i64>
9164 %1 = bitcast <8 x i64> %__b to <8 x i64>
9165 %2 = icmp sgt <8 x i64> %0, %1
9166 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9167 %4 = bitcast <16 x i1> %3 to i16
9171 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9172 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9173 ; VLX: # %bb.0: # %entry
9174 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9175 ; VLX-NEXT: kmovd %k0, %eax
9176 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9177 ; VLX-NEXT: vzeroupper
9180 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9181 ; NoVLX: # %bb.0: # %entry
9182 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9183 ; NoVLX-NEXT: kmovw %k0, %eax
9184 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9185 ; NoVLX-NEXT: vzeroupper
9188 %0 = bitcast <8 x i64> %__a to <8 x i64>
9189 %load = load <8 x i64>, ptr %__b
9190 %1 = bitcast <8 x i64> %load to <8 x i64>
9191 %2 = icmp sgt <8 x i64> %0, %1
9192 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9193 %4 = bitcast <16 x i1> %3 to i16
9197 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9198 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9199 ; VLX: # %bb.0: # %entry
9200 ; VLX-NEXT: kmovd %edi, %k1
9201 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9202 ; VLX-NEXT: kmovd %k0, %eax
9203 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9204 ; VLX-NEXT: vzeroupper
9207 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9208 ; NoVLX: # %bb.0: # %entry
9209 ; NoVLX-NEXT: kmovw %edi, %k1
9210 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9211 ; NoVLX-NEXT: kmovw %k0, %eax
9212 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9213 ; NoVLX-NEXT: vzeroupper
9216 %0 = bitcast <8 x i64> %__a to <8 x i64>
9217 %1 = bitcast <8 x i64> %__b to <8 x i64>
9218 %2 = icmp sgt <8 x i64> %0, %1
9219 %3 = bitcast i8 %__u to <8 x i1>
9220 %4 = and <8 x i1> %2, %3
9221 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9222 %6 = bitcast <16 x i1> %5 to i16
9226 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9227 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9228 ; VLX: # %bb.0: # %entry
9229 ; VLX-NEXT: kmovd %edi, %k1
9230 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9231 ; VLX-NEXT: kmovd %k0, %eax
9232 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9233 ; VLX-NEXT: vzeroupper
9236 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9237 ; NoVLX: # %bb.0: # %entry
9238 ; NoVLX-NEXT: kmovw %edi, %k1
9239 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9240 ; NoVLX-NEXT: kmovw %k0, %eax
9241 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9242 ; NoVLX-NEXT: vzeroupper
9245 %0 = bitcast <8 x i64> %__a to <8 x i64>
9246 %load = load <8 x i64>, ptr %__b
9247 %1 = bitcast <8 x i64> %load to <8 x i64>
9248 %2 = icmp sgt <8 x i64> %0, %1
9249 %3 = bitcast i8 %__u to <8 x i1>
9250 %4 = and <8 x i1> %2, %3
9251 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9252 %6 = bitcast <16 x i1> %5 to i16
9257 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9258 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9259 ; VLX: # %bb.0: # %entry
9260 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9261 ; VLX-NEXT: kmovd %k0, %eax
9262 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9263 ; VLX-NEXT: vzeroupper
9266 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9267 ; NoVLX: # %bb.0: # %entry
9268 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9269 ; NoVLX-NEXT: kmovw %k0, %eax
9270 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9271 ; NoVLX-NEXT: vzeroupper
9274 %0 = bitcast <8 x i64> %__a to <8 x i64>
9275 %load = load i64, ptr %__b
9276 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9277 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9278 %2 = icmp sgt <8 x i64> %0, %1
9279 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9280 %4 = bitcast <16 x i1> %3 to i16
9284 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9285 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9286 ; VLX: # %bb.0: # %entry
9287 ; VLX-NEXT: kmovd %edi, %k1
9288 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9289 ; VLX-NEXT: kmovd %k0, %eax
9290 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9291 ; VLX-NEXT: vzeroupper
9294 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9295 ; NoVLX: # %bb.0: # %entry
9296 ; NoVLX-NEXT: kmovw %edi, %k1
9297 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9298 ; NoVLX-NEXT: kmovw %k0, %eax
9299 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9300 ; NoVLX-NEXT: vzeroupper
9303 %0 = bitcast <8 x i64> %__a to <8 x i64>
9304 %load = load i64, ptr %__b
9305 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9306 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9307 %2 = icmp sgt <8 x i64> %0, %1
9308 %3 = bitcast i8 %__u to <8 x i1>
9309 %4 = and <8 x i1> %3, %2
9310 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9311 %6 = bitcast <16 x i1> %5 to i16
9316 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9317 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9318 ; VLX: # %bb.0: # %entry
9319 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9320 ; VLX-NEXT: kmovd %k0, %eax
9321 ; VLX-NEXT: vzeroupper
9324 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9325 ; NoVLX: # %bb.0: # %entry
9326 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9327 ; NoVLX-NEXT: kmovw %k0, %eax
9328 ; NoVLX-NEXT: vzeroupper
9331 %0 = bitcast <8 x i64> %__a to <8 x i64>
9332 %1 = bitcast <8 x i64> %__b to <8 x i64>
9333 %2 = icmp sgt <8 x i64> %0, %1
9334 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9335 %4 = bitcast <32 x i1> %3 to i32
9339 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9340 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9341 ; VLX: # %bb.0: # %entry
9342 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9343 ; VLX-NEXT: kmovd %k0, %eax
9344 ; VLX-NEXT: vzeroupper
9347 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9348 ; NoVLX: # %bb.0: # %entry
9349 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9350 ; NoVLX-NEXT: kmovw %k0, %eax
9351 ; NoVLX-NEXT: vzeroupper
9354 %0 = bitcast <8 x i64> %__a to <8 x i64>
9355 %load = load <8 x i64>, ptr %__b
9356 %1 = bitcast <8 x i64> %load to <8 x i64>
9357 %2 = icmp sgt <8 x i64> %0, %1
9358 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9359 %4 = bitcast <32 x i1> %3 to i32
9363 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9364 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9365 ; VLX: # %bb.0: # %entry
9366 ; VLX-NEXT: kmovd %edi, %k1
9367 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9368 ; VLX-NEXT: kmovd %k0, %eax
9369 ; VLX-NEXT: vzeroupper
9372 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9373 ; NoVLX: # %bb.0: # %entry
9374 ; NoVLX-NEXT: kmovw %edi, %k1
9375 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9376 ; NoVLX-NEXT: kmovw %k0, %eax
9377 ; NoVLX-NEXT: vzeroupper
9380 %0 = bitcast <8 x i64> %__a to <8 x i64>
9381 %1 = bitcast <8 x i64> %__b to <8 x i64>
9382 %2 = icmp sgt <8 x i64> %0, %1
9383 %3 = bitcast i8 %__u to <8 x i1>
9384 %4 = and <8 x i1> %2, %3
9385 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9386 %6 = bitcast <32 x i1> %5 to i32
9390 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9391 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9392 ; VLX: # %bb.0: # %entry
9393 ; VLX-NEXT: kmovd %edi, %k1
9394 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9395 ; VLX-NEXT: kmovd %k0, %eax
9396 ; VLX-NEXT: vzeroupper
9399 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9400 ; NoVLX: # %bb.0: # %entry
9401 ; NoVLX-NEXT: kmovw %edi, %k1
9402 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9403 ; NoVLX-NEXT: kmovw %k0, %eax
9404 ; NoVLX-NEXT: vzeroupper
9407 %0 = bitcast <8 x i64> %__a to <8 x i64>
9408 %load = load <8 x i64>, ptr %__b
9409 %1 = bitcast <8 x i64> %load to <8 x i64>
9410 %2 = icmp sgt <8 x i64> %0, %1
9411 %3 = bitcast i8 %__u to <8 x i1>
9412 %4 = and <8 x i1> %2, %3
9413 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9414 %6 = bitcast <32 x i1> %5 to i32
9419 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9420 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9421 ; VLX: # %bb.0: # %entry
9422 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9423 ; VLX-NEXT: kmovd %k0, %eax
9424 ; VLX-NEXT: vzeroupper
9427 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9428 ; NoVLX: # %bb.0: # %entry
9429 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9430 ; NoVLX-NEXT: kmovw %k0, %eax
9431 ; NoVLX-NEXT: vzeroupper
9434 %0 = bitcast <8 x i64> %__a to <8 x i64>
9435 %load = load i64, ptr %__b
9436 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9437 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9438 %2 = icmp sgt <8 x i64> %0, %1
9439 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9440 %4 = bitcast <32 x i1> %3 to i32
9444 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9445 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9446 ; VLX: # %bb.0: # %entry
9447 ; VLX-NEXT: kmovd %edi, %k1
9448 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9449 ; VLX-NEXT: kmovd %k0, %eax
9450 ; VLX-NEXT: vzeroupper
9453 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9454 ; NoVLX: # %bb.0: # %entry
9455 ; NoVLX-NEXT: kmovw %edi, %k1
9456 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9457 ; NoVLX-NEXT: kmovw %k0, %eax
9458 ; NoVLX-NEXT: vzeroupper
9461 %0 = bitcast <8 x i64> %__a to <8 x i64>
9462 %load = load i64, ptr %__b
9463 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9464 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9465 %2 = icmp sgt <8 x i64> %0, %1
9466 %3 = bitcast i8 %__u to <8 x i1>
9467 %4 = and <8 x i1> %3, %2
9468 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9469 %6 = bitcast <32 x i1> %5 to i32
9474 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9475 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9476 ; VLX: # %bb.0: # %entry
9477 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9478 ; VLX-NEXT: kmovq %k0, %rax
9479 ; VLX-NEXT: vzeroupper
9482 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9483 ; NoVLX: # %bb.0: # %entry
9484 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9485 ; NoVLX-NEXT: kmovw %k0, %eax
9486 ; NoVLX-NEXT: vzeroupper
9489 %0 = bitcast <8 x i64> %__a to <8 x i64>
9490 %1 = bitcast <8 x i64> %__b to <8 x i64>
9491 %2 = icmp sgt <8 x i64> %0, %1
9492 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9493 %4 = bitcast <64 x i1> %3 to i64
9497 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9498 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9499 ; VLX: # %bb.0: # %entry
9500 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9501 ; VLX-NEXT: kmovq %k0, %rax
9502 ; VLX-NEXT: vzeroupper
9505 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9506 ; NoVLX: # %bb.0: # %entry
9507 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9508 ; NoVLX-NEXT: kmovw %k0, %eax
9509 ; NoVLX-NEXT: vzeroupper
9512 %0 = bitcast <8 x i64> %__a to <8 x i64>
9513 %load = load <8 x i64>, ptr %__b
9514 %1 = bitcast <8 x i64> %load to <8 x i64>
9515 %2 = icmp sgt <8 x i64> %0, %1
9516 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9517 %4 = bitcast <64 x i1> %3 to i64
9521 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9522 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9523 ; VLX: # %bb.0: # %entry
9524 ; VLX-NEXT: kmovd %edi, %k1
9525 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9526 ; VLX-NEXT: kmovq %k0, %rax
9527 ; VLX-NEXT: vzeroupper
9530 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9531 ; NoVLX: # %bb.0: # %entry
9532 ; NoVLX-NEXT: kmovw %edi, %k1
9533 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9534 ; NoVLX-NEXT: kmovw %k0, %eax
9535 ; NoVLX-NEXT: vzeroupper
9538 %0 = bitcast <8 x i64> %__a to <8 x i64>
9539 %1 = bitcast <8 x i64> %__b to <8 x i64>
9540 %2 = icmp sgt <8 x i64> %0, %1
9541 %3 = bitcast i8 %__u to <8 x i1>
9542 %4 = and <8 x i1> %2, %3
9543 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9544 %6 = bitcast <64 x i1> %5 to i64
9548 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9549 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9550 ; VLX: # %bb.0: # %entry
9551 ; VLX-NEXT: kmovd %edi, %k1
9552 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9553 ; VLX-NEXT: kmovq %k0, %rax
9554 ; VLX-NEXT: vzeroupper
9557 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9558 ; NoVLX: # %bb.0: # %entry
9559 ; NoVLX-NEXT: kmovw %edi, %k1
9560 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9561 ; NoVLX-NEXT: kmovw %k0, %eax
9562 ; NoVLX-NEXT: vzeroupper
9565 %0 = bitcast <8 x i64> %__a to <8 x i64>
9566 %load = load <8 x i64>, ptr %__b
9567 %1 = bitcast <8 x i64> %load to <8 x i64>
9568 %2 = icmp sgt <8 x i64> %0, %1
9569 %3 = bitcast i8 %__u to <8 x i1>
9570 %4 = and <8 x i1> %2, %3
9571 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9572 %6 = bitcast <64 x i1> %5 to i64
9577 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
9578 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9579 ; VLX: # %bb.0: # %entry
9580 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9581 ; VLX-NEXT: kmovq %k0, %rax
9582 ; VLX-NEXT: vzeroupper
9585 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9586 ; NoVLX: # %bb.0: # %entry
9587 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9588 ; NoVLX-NEXT: kmovw %k0, %eax
9589 ; NoVLX-NEXT: vzeroupper
9592 %0 = bitcast <8 x i64> %__a to <8 x i64>
9593 %load = load i64, ptr %__b
9594 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9595 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9596 %2 = icmp sgt <8 x i64> %0, %1
9597 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9598 %4 = bitcast <64 x i1> %3 to i64
9602 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
9603 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9604 ; VLX: # %bb.0: # %entry
9605 ; VLX-NEXT: kmovd %edi, %k1
9606 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9607 ; VLX-NEXT: kmovq %k0, %rax
9608 ; VLX-NEXT: vzeroupper
9611 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9612 ; NoVLX: # %bb.0: # %entry
9613 ; NoVLX-NEXT: kmovw %edi, %k1
9614 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9615 ; NoVLX-NEXT: kmovw %k0, %eax
9616 ; NoVLX-NEXT: vzeroupper
9619 %0 = bitcast <8 x i64> %__a to <8 x i64>
9620 %load = load i64, ptr %__b
9621 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9622 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9623 %2 = icmp sgt <8 x i64> %0, %1
9624 %3 = bitcast i8 %__u to <8 x i1>
9625 %4 = and <8 x i1> %3, %2
9626 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9627 %6 = bitcast <64 x i1> %5 to i64
9632 define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9633 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9634 ; VLX: # %bb.0: # %entry
9635 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0
9636 ; VLX-NEXT: kmovd %k0, %eax
9639 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9640 ; NoVLX: # %bb.0: # %entry
9641 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9642 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9643 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9644 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9645 ; NoVLX-NEXT: kmovw %k0, %eax
9646 ; NoVLX-NEXT: vzeroupper
9649 %0 = bitcast <2 x i64> %__a to <16 x i8>
9650 %1 = bitcast <2 x i64> %__b to <16 x i8>
9651 %2 = icmp sge <16 x i8> %0, %1
9652 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9653 %4 = bitcast <32 x i1> %3 to i32
9657 define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
9658 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9659 ; VLX: # %bb.0: # %entry
9660 ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
9661 ; VLX-NEXT: kmovd %k0, %eax
9664 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9665 ; NoVLX: # %bb.0: # %entry
9666 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
9667 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9668 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9669 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9670 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9671 ; NoVLX-NEXT: kmovw %k0, %eax
9672 ; NoVLX-NEXT: vzeroupper
9675 %0 = bitcast <2 x i64> %__a to <16 x i8>
9676 %load = load <2 x i64>, ptr %__b
9677 %1 = bitcast <2 x i64> %load to <16 x i8>
9678 %2 = icmp sge <16 x i8> %0, %1
9679 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9680 %4 = bitcast <32 x i1> %3 to i32
9684 define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9685 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9686 ; VLX: # %bb.0: # %entry
9687 ; VLX-NEXT: kmovd %edi, %k1
9688 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9689 ; VLX-NEXT: kmovd %k0, %eax
9692 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9693 ; NoVLX: # %bb.0: # %entry
9694 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9695 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9696 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9697 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9698 ; NoVLX-NEXT: kmovw %k0, %eax
9699 ; NoVLX-NEXT: andl %edi, %eax
9700 ; NoVLX-NEXT: vzeroupper
9703 %0 = bitcast <2 x i64> %__a to <16 x i8>
9704 %1 = bitcast <2 x i64> %__b to <16 x i8>
9705 %2 = icmp sge <16 x i8> %0, %1
9706 %3 = bitcast i16 %__u to <16 x i1>
9707 %4 = and <16 x i1> %2, %3
9708 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9709 %6 = bitcast <32 x i1> %5 to i32
9713 define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
9714 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9715 ; VLX: # %bb.0: # %entry
9716 ; VLX-NEXT: kmovd %edi, %k1
9717 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9718 ; VLX-NEXT: kmovd %k0, %eax
9721 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9722 ; NoVLX: # %bb.0: # %entry
9723 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
9724 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9725 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9726 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9727 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9728 ; NoVLX-NEXT: kmovw %k0, %eax
9729 ; NoVLX-NEXT: andl %edi, %eax
9730 ; NoVLX-NEXT: vzeroupper
9733 %0 = bitcast <2 x i64> %__a to <16 x i8>
9734 %load = load <2 x i64>, ptr %__b
9735 %1 = bitcast <2 x i64> %load to <16 x i8>
9736 %2 = icmp sge <16 x i8> %0, %1
9737 %3 = bitcast i16 %__u to <16 x i1>
9738 %4 = and <16 x i1> %2, %3
9739 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9740 %6 = bitcast <32 x i1> %5 to i32
9745 define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9746 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9747 ; VLX: # %bb.0: # %entry
9748 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0
9749 ; VLX-NEXT: kmovq %k0, %rax
9752 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9753 ; NoVLX: # %bb.0: # %entry
9754 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9755 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9756 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9757 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9758 ; NoVLX-NEXT: kmovw %k0, %eax
9759 ; NoVLX-NEXT: vzeroupper
9762 %0 = bitcast <2 x i64> %__a to <16 x i8>
9763 %1 = bitcast <2 x i64> %__b to <16 x i8>
9764 %2 = icmp sge <16 x i8> %0, %1
9765 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9766 %4 = bitcast <64 x i1> %3 to i64
9770 define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
9771 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9772 ; VLX: # %bb.0: # %entry
9773 ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
9774 ; VLX-NEXT: kmovq %k0, %rax
9777 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9778 ; NoVLX: # %bb.0: # %entry
9779 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
9780 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9781 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9782 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9783 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9784 ; NoVLX-NEXT: kmovw %k0, %eax
9785 ; NoVLX-NEXT: vzeroupper
9788 %0 = bitcast <2 x i64> %__a to <16 x i8>
9789 %load = load <2 x i64>, ptr %__b
9790 %1 = bitcast <2 x i64> %load to <16 x i8>
9791 %2 = icmp sge <16 x i8> %0, %1
9792 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9793 %4 = bitcast <64 x i1> %3 to i64
9797 define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9798 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9799 ; VLX: # %bb.0: # %entry
9800 ; VLX-NEXT: kmovd %edi, %k1
9801 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9802 ; VLX-NEXT: kmovq %k0, %rax
9805 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9806 ; NoVLX: # %bb.0: # %entry
9807 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9808 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9809 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9810 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9811 ; NoVLX-NEXT: kmovw %k0, %eax
9812 ; NoVLX-NEXT: andl %edi, %eax
9813 ; NoVLX-NEXT: vzeroupper
9816 %0 = bitcast <2 x i64> %__a to <16 x i8>
9817 %1 = bitcast <2 x i64> %__b to <16 x i8>
9818 %2 = icmp sge <16 x i8> %0, %1
9819 %3 = bitcast i16 %__u to <16 x i1>
9820 %4 = and <16 x i1> %2, %3
9821 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9822 %6 = bitcast <64 x i1> %5 to i64
9826 define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
9827 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9828 ; VLX: # %bb.0: # %entry
9829 ; VLX-NEXT: kmovd %edi, %k1
9830 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9831 ; VLX-NEXT: kmovq %k0, %rax
9834 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9835 ; NoVLX: # %bb.0: # %entry
9836 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
9837 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9838 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9839 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9840 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9841 ; NoVLX-NEXT: kmovw %k0, %eax
9842 ; NoVLX-NEXT: andl %edi, %eax
9843 ; NoVLX-NEXT: vzeroupper
9846 %0 = bitcast <2 x i64> %__a to <16 x i8>
9847 %load = load <2 x i64>, ptr %__b
9848 %1 = bitcast <2 x i64> %load to <16 x i8>
9849 %2 = icmp sge <16 x i8> %0, %1
9850 %3 = bitcast i16 %__u to <16 x i1>
9851 %4 = and <16 x i1> %2, %3
9852 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9853 %6 = bitcast <64 x i1> %5 to i64
9858 define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9859 ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
9860 ; VLX: # %bb.0: # %entry
9861 ; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0
9862 ; VLX-NEXT: kmovq %k0, %rax
9863 ; VLX-NEXT: vzeroupper
9866 ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
9867 ; NoVLX: # %bb.0: # %entry
9868 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9869 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9870 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9871 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9872 ; NoVLX-NEXT: kmovw %k0, %ecx
9873 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9874 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9875 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9876 ; NoVLX-NEXT: kmovw %k0, %eax
9877 ; NoVLX-NEXT: shll $16, %eax
9878 ; NoVLX-NEXT: orl %ecx, %eax
9879 ; NoVLX-NEXT: vzeroupper
9882 %0 = bitcast <4 x i64> %__a to <32 x i8>
9883 %1 = bitcast <4 x i64> %__b to <32 x i8>
9884 %2 = icmp sge <32 x i8> %0, %1
9885 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9886 %4 = bitcast <64 x i1> %3 to i64
9890 define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
9891 ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
9892 ; VLX: # %bb.0: # %entry
9893 ; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0
9894 ; VLX-NEXT: kmovq %k0, %rax
9895 ; VLX-NEXT: vzeroupper
9898 ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
9899 ; NoVLX: # %bb.0: # %entry
9900 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
9901 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9902 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9903 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9904 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9905 ; NoVLX-NEXT: kmovw %k0, %ecx
9906 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9907 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9908 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9909 ; NoVLX-NEXT: kmovw %k0, %eax
9910 ; NoVLX-NEXT: shll $16, %eax
9911 ; NoVLX-NEXT: orl %ecx, %eax
9912 ; NoVLX-NEXT: vzeroupper
9915 %0 = bitcast <4 x i64> %__a to <32 x i8>
9916 %load = load <4 x i64>, ptr %__b
9917 %1 = bitcast <4 x i64> %load to <32 x i8>
9918 %2 = icmp sge <32 x i8> %0, %1
9919 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9920 %4 = bitcast <64 x i1> %3 to i64
9924 define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9925 ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
9926 ; VLX: # %bb.0: # %entry
9927 ; VLX-NEXT: kmovd %edi, %k1
9928 ; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1}
9929 ; VLX-NEXT: kmovq %k0, %rax
9930 ; VLX-NEXT: vzeroupper
9933 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
9934 ; NoVLX: # %bb.0: # %entry
9935 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9936 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9937 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9938 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9939 ; NoVLX-NEXT: kmovw %k0, %eax
9940 ; NoVLX-NEXT: andl %edi, %eax
9941 ; NoVLX-NEXT: shrl $16, %edi
9942 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9943 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9944 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9945 ; NoVLX-NEXT: kmovw %k0, %ecx
9946 ; NoVLX-NEXT: andl %edi, %ecx
9947 ; NoVLX-NEXT: shll $16, %ecx
9948 ; NoVLX-NEXT: movzwl %ax, %eax
9949 ; NoVLX-NEXT: orl %ecx, %eax
9950 ; NoVLX-NEXT: vzeroupper
9953 %0 = bitcast <4 x i64> %__a to <32 x i8>
9954 %1 = bitcast <4 x i64> %__b to <32 x i8>
9955 %2 = icmp sge <32 x i8> %0, %1
9956 %3 = bitcast i32 %__u to <32 x i1>
9957 %4 = and <32 x i1> %2, %3
9958 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
9959 %6 = bitcast <64 x i1> %5 to i64
9963 define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
9964 ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
9965 ; VLX: # %bb.0: # %entry
9966 ; VLX-NEXT: kmovd %edi, %k1
9967 ; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
9968 ; VLX-NEXT: kmovq %k0, %rax
9969 ; VLX-NEXT: vzeroupper
9972 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
9973 ; NoVLX: # %bb.0: # %entry
9974 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
9975 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
9976 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9977 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
9978 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
9979 ; NoVLX-NEXT: kmovw %k0, %eax
9980 ; NoVLX-NEXT: andl %edi, %eax
9981 ; NoVLX-NEXT: shrl $16, %edi
9982 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
9983 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9984 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9985 ; NoVLX-NEXT: kmovw %k0, %ecx
9986 ; NoVLX-NEXT: andl %edi, %ecx
9987 ; NoVLX-NEXT: shll $16, %ecx
9988 ; NoVLX-NEXT: movzwl %ax, %eax
9989 ; NoVLX-NEXT: orl %ecx, %eax
9990 ; NoVLX-NEXT: vzeroupper
9993 %0 = bitcast <4 x i64> %__a to <32 x i8>
9994 %load = load <4 x i64>, ptr %__b
9995 %1 = bitcast <4 x i64> %load to <32 x i8>
9996 %2 = icmp sge <32 x i8> %0, %1
9997 %3 = bitcast i32 %__u to <32 x i1>
9998 %4 = and <32 x i1> %2, %3
9999 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10000 %6 = bitcast <64 x i1> %5 to i64
10005 define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10006 ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
10007 ; VLX: # %bb.0: # %entry
10008 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10009 ; VLX-NEXT: kmovd %k0, %eax
10010 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10013 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
10014 ; NoVLX: # %bb.0: # %entry
10015 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10016 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10017 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10018 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10019 ; NoVLX-NEXT: kmovw %k0, %eax
10020 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10021 ; NoVLX-NEXT: vzeroupper
10024 %0 = bitcast <2 x i64> %__a to <8 x i16>
10025 %1 = bitcast <2 x i64> %__b to <8 x i16>
10026 %2 = icmp sge <8 x i16> %0, %1
10027 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10028 %4 = bitcast <16 x i1> %3 to i16
10032 define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10033 ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10034 ; VLX: # %bb.0: # %entry
10035 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10036 ; VLX-NEXT: kmovd %k0, %eax
10037 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10040 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10041 ; NoVLX: # %bb.0: # %entry
10042 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10043 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10044 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10045 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10046 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10047 ; NoVLX-NEXT: kmovw %k0, %eax
10048 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10049 ; NoVLX-NEXT: vzeroupper
10052 %0 = bitcast <2 x i64> %__a to <8 x i16>
10053 %load = load <2 x i64>, ptr %__b
10054 %1 = bitcast <2 x i64> %load to <8 x i16>
10055 %2 = icmp sge <8 x i16> %0, %1
10056 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10057 %4 = bitcast <16 x i1> %3 to i16
10061 define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10062 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10063 ; VLX: # %bb.0: # %entry
10064 ; VLX-NEXT: kmovd %edi, %k1
10065 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10066 ; VLX-NEXT: kmovd %k0, %eax
10067 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10070 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10071 ; NoVLX: # %bb.0: # %entry
10072 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10073 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10074 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10075 ; NoVLX-NEXT: kmovw %edi, %k1
10076 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10077 ; NoVLX-NEXT: kmovw %k0, %eax
10078 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10079 ; NoVLX-NEXT: vzeroupper
10082 %0 = bitcast <2 x i64> %__a to <8 x i16>
10083 %1 = bitcast <2 x i64> %__b to <8 x i16>
10084 %2 = icmp sge <8 x i16> %0, %1
10085 %3 = bitcast i8 %__u to <8 x i1>
10086 %4 = and <8 x i1> %2, %3
10087 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10088 %6 = bitcast <16 x i1> %5 to i16
10092 define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
10093 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10094 ; VLX: # %bb.0: # %entry
10095 ; VLX-NEXT: kmovd %edi, %k1
10096 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10097 ; VLX-NEXT: kmovd %k0, %eax
10098 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10101 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10102 ; NoVLX: # %bb.0: # %entry
10103 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10104 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10105 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10106 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10107 ; NoVLX-NEXT: kmovw %edi, %k1
10108 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10109 ; NoVLX-NEXT: kmovw %k0, %eax
10110 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10111 ; NoVLX-NEXT: vzeroupper
10114 %0 = bitcast <2 x i64> %__a to <8 x i16>
10115 %load = load <2 x i64>, ptr %__b
10116 %1 = bitcast <2 x i64> %load to <8 x i16>
10117 %2 = icmp sge <8 x i16> %0, %1
10118 %3 = bitcast i8 %__u to <8 x i1>
10119 %4 = and <8 x i1> %2, %3
10120 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10121 %6 = bitcast <16 x i1> %5 to i16
10126 define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10127 ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10128 ; VLX: # %bb.0: # %entry
10129 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10130 ; VLX-NEXT: kmovd %k0, %eax
10133 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10134 ; NoVLX: # %bb.0: # %entry
10135 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10136 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10137 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10138 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10139 ; NoVLX-NEXT: kmovw %k0, %eax
10140 ; NoVLX-NEXT: vzeroupper
10143 %0 = bitcast <2 x i64> %__a to <8 x i16>
10144 %1 = bitcast <2 x i64> %__b to <8 x i16>
10145 %2 = icmp sge <8 x i16> %0, %1
10146 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10147 %4 = bitcast <32 x i1> %3 to i32
10151 define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10152 ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10153 ; VLX: # %bb.0: # %entry
10154 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10155 ; VLX-NEXT: kmovd %k0, %eax
10158 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10159 ; NoVLX: # %bb.0: # %entry
10160 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10161 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10162 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10163 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10164 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10165 ; NoVLX-NEXT: kmovw %k0, %eax
10166 ; NoVLX-NEXT: vzeroupper
10169 %0 = bitcast <2 x i64> %__a to <8 x i16>
10170 %load = load <2 x i64>, ptr %__b
10171 %1 = bitcast <2 x i64> %load to <8 x i16>
10172 %2 = icmp sge <8 x i16> %0, %1
10173 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10174 %4 = bitcast <32 x i1> %3 to i32
10178 define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10179 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10180 ; VLX: # %bb.0: # %entry
10181 ; VLX-NEXT: kmovd %edi, %k1
10182 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10183 ; VLX-NEXT: kmovd %k0, %eax
10186 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10187 ; NoVLX: # %bb.0: # %entry
10188 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10189 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10190 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10191 ; NoVLX-NEXT: kmovw %edi, %k1
10192 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10193 ; NoVLX-NEXT: kmovw %k0, %eax
10194 ; NoVLX-NEXT: vzeroupper
10197 %0 = bitcast <2 x i64> %__a to <8 x i16>
10198 %1 = bitcast <2 x i64> %__b to <8 x i16>
10199 %2 = icmp sge <8 x i16> %0, %1
10200 %3 = bitcast i8 %__u to <8 x i1>
10201 %4 = and <8 x i1> %2, %3
10202 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10203 %6 = bitcast <32 x i1> %5 to i32
10207 define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
10208 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10209 ; VLX: # %bb.0: # %entry
10210 ; VLX-NEXT: kmovd %edi, %k1
10211 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10212 ; VLX-NEXT: kmovd %k0, %eax
10215 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10216 ; NoVLX: # %bb.0: # %entry
10217 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10218 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10219 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10220 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10221 ; NoVLX-NEXT: kmovw %edi, %k1
10222 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10223 ; NoVLX-NEXT: kmovw %k0, %eax
10224 ; NoVLX-NEXT: vzeroupper
10227 %0 = bitcast <2 x i64> %__a to <8 x i16>
10228 %load = load <2 x i64>, ptr %__b
10229 %1 = bitcast <2 x i64> %load to <8 x i16>
10230 %2 = icmp sge <8 x i16> %0, %1
10231 %3 = bitcast i8 %__u to <8 x i1>
10232 %4 = and <8 x i1> %2, %3
10233 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10234 %6 = bitcast <32 x i1> %5 to i32
10239 define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10240 ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10241 ; VLX: # %bb.0: # %entry
10242 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10243 ; VLX-NEXT: kmovq %k0, %rax
10246 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10247 ; NoVLX: # %bb.0: # %entry
10248 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10249 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10250 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10251 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10252 ; NoVLX-NEXT: kmovw %k0, %eax
10253 ; NoVLX-NEXT: vzeroupper
10256 %0 = bitcast <2 x i64> %__a to <8 x i16>
10257 %1 = bitcast <2 x i64> %__b to <8 x i16>
10258 %2 = icmp sge <8 x i16> %0, %1
10259 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10260 %4 = bitcast <64 x i1> %3 to i64
10264 define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10265 ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10266 ; VLX: # %bb.0: # %entry
10267 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10268 ; VLX-NEXT: kmovq %k0, %rax
10271 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10272 ; NoVLX: # %bb.0: # %entry
10273 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10274 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10275 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10276 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10277 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10278 ; NoVLX-NEXT: kmovw %k0, %eax
10279 ; NoVLX-NEXT: vzeroupper
10282 %0 = bitcast <2 x i64> %__a to <8 x i16>
10283 %load = load <2 x i64>, ptr %__b
10284 %1 = bitcast <2 x i64> %load to <8 x i16>
10285 %2 = icmp sge <8 x i16> %0, %1
10286 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10287 %4 = bitcast <64 x i1> %3 to i64
10291 define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10292 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10293 ; VLX: # %bb.0: # %entry
10294 ; VLX-NEXT: kmovd %edi, %k1
10295 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10296 ; VLX-NEXT: kmovq %k0, %rax
10299 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10300 ; NoVLX: # %bb.0: # %entry
10301 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10302 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10303 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10304 ; NoVLX-NEXT: kmovw %edi, %k1
10305 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10306 ; NoVLX-NEXT: kmovw %k0, %eax
10307 ; NoVLX-NEXT: vzeroupper
10310 %0 = bitcast <2 x i64> %__a to <8 x i16>
10311 %1 = bitcast <2 x i64> %__b to <8 x i16>
10312 %2 = icmp sge <8 x i16> %0, %1
10313 %3 = bitcast i8 %__u to <8 x i1>
10314 %4 = and <8 x i1> %2, %3
10315 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10316 %6 = bitcast <64 x i1> %5 to i64
10320 define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
10321 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10322 ; VLX: # %bb.0: # %entry
10323 ; VLX-NEXT: kmovd %edi, %k1
10324 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10325 ; VLX-NEXT: kmovq %k0, %rax
10328 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10329 ; NoVLX: # %bb.0: # %entry
10330 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10331 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10332 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10333 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10334 ; NoVLX-NEXT: kmovw %edi, %k1
10335 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10336 ; NoVLX-NEXT: kmovw %k0, %eax
10337 ; NoVLX-NEXT: vzeroupper
10340 %0 = bitcast <2 x i64> %__a to <8 x i16>
10341 %load = load <2 x i64>, ptr %__b
10342 %1 = bitcast <2 x i64> %load to <8 x i16>
10343 %2 = icmp sge <8 x i16> %0, %1
10344 %3 = bitcast i8 %__u to <8 x i1>
10345 %4 = and <8 x i1> %2, %3
10346 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10347 %6 = bitcast <64 x i1> %5 to i64
10352 define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10353 ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10354 ; VLX: # %bb.0: # %entry
10355 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0
10356 ; VLX-NEXT: kmovd %k0, %eax
10357 ; VLX-NEXT: vzeroupper
10360 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10361 ; NoVLX: # %bb.0: # %entry
10362 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10363 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10364 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10365 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10366 ; NoVLX-NEXT: kmovw %k0, %eax
10367 ; NoVLX-NEXT: vzeroupper
10370 %0 = bitcast <4 x i64> %__a to <16 x i16>
10371 %1 = bitcast <4 x i64> %__b to <16 x i16>
10372 %2 = icmp sge <16 x i16> %0, %1
10373 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10374 %4 = bitcast <32 x i1> %3 to i32
10378 define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
10379 ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10380 ; VLX: # %bb.0: # %entry
10381 ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
10382 ; VLX-NEXT: kmovd %k0, %eax
10383 ; VLX-NEXT: vzeroupper
10386 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10387 ; NoVLX: # %bb.0: # %entry
10388 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
10389 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10390 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10391 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10392 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10393 ; NoVLX-NEXT: kmovw %k0, %eax
10394 ; NoVLX-NEXT: vzeroupper
10397 %0 = bitcast <4 x i64> %__a to <16 x i16>
10398 %load = load <4 x i64>, ptr %__b
10399 %1 = bitcast <4 x i64> %load to <16 x i16>
10400 %2 = icmp sge <16 x i16> %0, %1
10401 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10402 %4 = bitcast <32 x i1> %3 to i32
10406 define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10407 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10408 ; VLX: # %bb.0: # %entry
10409 ; VLX-NEXT: kmovd %edi, %k1
10410 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10411 ; VLX-NEXT: kmovd %k0, %eax
10412 ; VLX-NEXT: vzeroupper
10415 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10416 ; NoVLX: # %bb.0: # %entry
10417 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10418 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10419 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10420 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10421 ; NoVLX-NEXT: kmovw %k0, %eax
10422 ; NoVLX-NEXT: andl %edi, %eax
10423 ; NoVLX-NEXT: vzeroupper
10426 %0 = bitcast <4 x i64> %__a to <16 x i16>
10427 %1 = bitcast <4 x i64> %__b to <16 x i16>
10428 %2 = icmp sge <16 x i16> %0, %1
10429 %3 = bitcast i16 %__u to <16 x i1>
10430 %4 = and <16 x i1> %2, %3
10431 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10432 %6 = bitcast <32 x i1> %5 to i32
10436 define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
10437 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10438 ; VLX: # %bb.0: # %entry
10439 ; VLX-NEXT: kmovd %edi, %k1
10440 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10441 ; VLX-NEXT: kmovd %k0, %eax
10442 ; VLX-NEXT: vzeroupper
10445 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10446 ; NoVLX: # %bb.0: # %entry
10447 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10448 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10449 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10450 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10451 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10452 ; NoVLX-NEXT: kmovw %k0, %eax
10453 ; NoVLX-NEXT: andl %edi, %eax
10454 ; NoVLX-NEXT: vzeroupper
10457 %0 = bitcast <4 x i64> %__a to <16 x i16>
10458 %load = load <4 x i64>, ptr %__b
10459 %1 = bitcast <4 x i64> %load to <16 x i16>
10460 %2 = icmp sge <16 x i16> %0, %1
10461 %3 = bitcast i16 %__u to <16 x i1>
10462 %4 = and <16 x i1> %2, %3
10463 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10464 %6 = bitcast <32 x i1> %5 to i32
10469 define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10470 ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10471 ; VLX: # %bb.0: # %entry
10472 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0
10473 ; VLX-NEXT: kmovq %k0, %rax
10474 ; VLX-NEXT: vzeroupper
10477 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10478 ; NoVLX: # %bb.0: # %entry
10479 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10480 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10481 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10482 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10483 ; NoVLX-NEXT: kmovw %k0, %eax
10484 ; NoVLX-NEXT: vzeroupper
10487 %0 = bitcast <4 x i64> %__a to <16 x i16>
10488 %1 = bitcast <4 x i64> %__b to <16 x i16>
10489 %2 = icmp sge <16 x i16> %0, %1
10490 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10491 %4 = bitcast <64 x i1> %3 to i64
10495 define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
10496 ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10497 ; VLX: # %bb.0: # %entry
10498 ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
10499 ; VLX-NEXT: kmovq %k0, %rax
10500 ; VLX-NEXT: vzeroupper
10503 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10504 ; NoVLX: # %bb.0: # %entry
10505 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
10506 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10507 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10508 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10509 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10510 ; NoVLX-NEXT: kmovw %k0, %eax
10511 ; NoVLX-NEXT: vzeroupper
10514 %0 = bitcast <4 x i64> %__a to <16 x i16>
10515 %load = load <4 x i64>, ptr %__b
10516 %1 = bitcast <4 x i64> %load to <16 x i16>
10517 %2 = icmp sge <16 x i16> %0, %1
10518 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10519 %4 = bitcast <64 x i1> %3 to i64
10523 define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10524 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10525 ; VLX: # %bb.0: # %entry
10526 ; VLX-NEXT: kmovd %edi, %k1
10527 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10528 ; VLX-NEXT: kmovq %k0, %rax
10529 ; VLX-NEXT: vzeroupper
10532 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10533 ; NoVLX: # %bb.0: # %entry
10534 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10535 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10536 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10537 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10538 ; NoVLX-NEXT: kmovw %k0, %eax
10539 ; NoVLX-NEXT: andl %edi, %eax
10540 ; NoVLX-NEXT: vzeroupper
10543 %0 = bitcast <4 x i64> %__a to <16 x i16>
10544 %1 = bitcast <4 x i64> %__b to <16 x i16>
10545 %2 = icmp sge <16 x i16> %0, %1
10546 %3 = bitcast i16 %__u to <16 x i1>
10547 %4 = and <16 x i1> %2, %3
10548 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10549 %6 = bitcast <64 x i1> %5 to i64
10553 define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
10554 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10555 ; VLX: # %bb.0: # %entry
10556 ; VLX-NEXT: kmovd %edi, %k1
10557 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10558 ; VLX-NEXT: kmovq %k0, %rax
10559 ; VLX-NEXT: vzeroupper
10562 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10563 ; NoVLX: # %bb.0: # %entry
10564 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10565 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10566 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10567 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10568 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10569 ; NoVLX-NEXT: kmovw %k0, %eax
10570 ; NoVLX-NEXT: andl %edi, %eax
10571 ; NoVLX-NEXT: vzeroupper
10574 %0 = bitcast <4 x i64> %__a to <16 x i16>
10575 %load = load <4 x i64>, ptr %__b
10576 %1 = bitcast <4 x i64> %load to <16 x i16>
10577 %2 = icmp sge <16 x i16> %0, %1
10578 %3 = bitcast i16 %__u to <16 x i1>
10579 %4 = and <16 x i1> %2, %3
10580 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10581 %6 = bitcast <64 x i1> %5 to i64
10586 define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10587 ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10588 ; VLX: # %bb.0: # %entry
10589 ; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
10590 ; VLX-NEXT: kmovq %k0, %rax
10591 ; VLX-NEXT: vzeroupper
10594 ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10595 ; NoVLX: # %bb.0: # %entry
10596 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
10597 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
10598 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
10599 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
10600 ; NoVLX-NEXT: kmovw %k0, %ecx
10601 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
10602 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
10603 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10604 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10605 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10606 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10607 ; NoVLX-NEXT: kmovw %k0, %eax
10608 ; NoVLX-NEXT: shll $16, %eax
10609 ; NoVLX-NEXT: orl %ecx, %eax
10610 ; NoVLX-NEXT: vzeroupper
10613 %0 = bitcast <8 x i64> %__a to <32 x i16>
10614 %1 = bitcast <8 x i64> %__b to <32 x i16>
10615 %2 = icmp sge <32 x i16> %0, %1
10616 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10617 %4 = bitcast <64 x i1> %3 to i64
10621 define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
10622 ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10623 ; VLX: # %bb.0: # %entry
10624 ; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0
10625 ; VLX-NEXT: kmovq %k0, %rax
10626 ; VLX-NEXT: vzeroupper
10629 ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10630 ; NoVLX: # %bb.0: # %entry
10631 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
10632 ; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
10633 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1
10634 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
10635 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
10636 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
10637 ; NoVLX-NEXT: kmovw %k0, %ecx
10638 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
10639 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
10640 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10641 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10642 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10643 ; NoVLX-NEXT: kmovw %k0, %eax
10644 ; NoVLX-NEXT: shll $16, %eax
10645 ; NoVLX-NEXT: orl %ecx, %eax
10646 ; NoVLX-NEXT: vzeroupper
10649 %0 = bitcast <8 x i64> %__a to <32 x i16>
10650 %load = load <8 x i64>, ptr %__b
10651 %1 = bitcast <8 x i64> %load to <32 x i16>
10652 %2 = icmp sge <32 x i16> %0, %1
10653 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10654 %4 = bitcast <64 x i1> %3 to i64
10658 define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10659 ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10660 ; VLX: # %bb.0: # %entry
10661 ; VLX-NEXT: kmovd %edi, %k1
10662 ; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
10663 ; VLX-NEXT: kmovq %k0, %rax
10664 ; VLX-NEXT: vzeroupper
10667 ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10668 ; NoVLX: # %bb.0: # %entry
10669 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
10670 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
10671 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
10672 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
10673 ; NoVLX-NEXT: kmovw %k0, %eax
10674 ; NoVLX-NEXT: andl %edi, %eax
10675 ; NoVLX-NEXT: shrl $16, %edi
10676 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
10677 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
10678 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10679 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10680 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10681 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10682 ; NoVLX-NEXT: kmovw %k0, %ecx
10683 ; NoVLX-NEXT: andl %edi, %ecx
10684 ; NoVLX-NEXT: shll $16, %ecx
10685 ; NoVLX-NEXT: movzwl %ax, %eax
10686 ; NoVLX-NEXT: orl %ecx, %eax
10687 ; NoVLX-NEXT: vzeroupper
10690 %0 = bitcast <8 x i64> %__a to <32 x i16>
10691 %1 = bitcast <8 x i64> %__b to <32 x i16>
10692 %2 = icmp sge <32 x i16> %0, %1
10693 %3 = bitcast i32 %__u to <32 x i1>
10694 %4 = and <32 x i1> %2, %3
10695 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10696 %6 = bitcast <64 x i1> %5 to i64
10700 define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
10701 ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10702 ; VLX: # %bb.0: # %entry
10703 ; VLX-NEXT: kmovd %edi, %k1
10704 ; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
10705 ; VLX-NEXT: kmovq %k0, %rax
10706 ; VLX-NEXT: vzeroupper
10709 ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10710 ; NoVLX: # %bb.0: # %entry
10711 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10712 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1
10713 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
10714 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
10715 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
10716 ; NoVLX-NEXT: kmovw %k0, %eax
10717 ; NoVLX-NEXT: andl %edi, %eax
10718 ; NoVLX-NEXT: shrl $16, %edi
10719 ; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1
10720 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
10721 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10722 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10723 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10724 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10725 ; NoVLX-NEXT: kmovw %k0, %ecx
10726 ; NoVLX-NEXT: andl %edi, %ecx
10727 ; NoVLX-NEXT: shll $16, %ecx
10728 ; NoVLX-NEXT: movzwl %ax, %eax
10729 ; NoVLX-NEXT: orl %ecx, %eax
10730 ; NoVLX-NEXT: vzeroupper
10733 %0 = bitcast <8 x i64> %__a to <32 x i16>
10734 %load = load <8 x i64>, ptr %__b
10735 %1 = bitcast <8 x i64> %load to <32 x i16>
10736 %2 = icmp sge <32 x i16> %0, %1
10737 %3 = bitcast i32 %__u to <32 x i1>
10738 %4 = and <32 x i1> %2, %3
10739 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10740 %6 = bitcast <64 x i1> %5 to i64
10745 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10746 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10747 ; VLX: # %bb.0: # %entry
10748 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
10749 ; VLX-NEXT: kmovd %k0, %eax
10750 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10753 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10754 ; NoVLX: # %bb.0: # %entry
10755 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10756 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10757 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10758 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10759 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10760 ; NoVLX-NEXT: kmovw %k0, %eax
10761 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10762 ; NoVLX-NEXT: vzeroupper
10765 %0 = bitcast <2 x i64> %__a to <4 x i32>
10766 %1 = bitcast <2 x i64> %__b to <4 x i32>
10767 %2 = icmp sge <4 x i32> %0, %1
10768 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10769 %4 = bitcast <8 x i1> %3 to i8
10773 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10774 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10775 ; VLX: # %bb.0: # %entry
10776 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
10777 ; VLX-NEXT: kmovd %k0, %eax
10778 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10781 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10782 ; NoVLX: # %bb.0: # %entry
10783 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10784 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10785 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10786 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10787 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10788 ; NoVLX-NEXT: kmovw %k0, %eax
10789 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10790 ; NoVLX-NEXT: vzeroupper
10793 %0 = bitcast <2 x i64> %__a to <4 x i32>
10794 %load = load <2 x i64>, ptr %__b
10795 %1 = bitcast <2 x i64> %load to <4 x i32>
10796 %2 = icmp sge <4 x i32> %0, %1
10797 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10798 %4 = bitcast <8 x i1> %3 to i8
10802 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10803 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10804 ; VLX: # %bb.0: # %entry
10805 ; VLX-NEXT: kmovd %edi, %k1
10806 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
10807 ; VLX-NEXT: kmovd %k0, %eax
10808 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10811 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10812 ; NoVLX: # %bb.0: # %entry
10813 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10814 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10815 ; NoVLX-NEXT: kmovw %edi, %k1
10816 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10817 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10818 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10819 ; NoVLX-NEXT: kmovw %k0, %eax
10820 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10821 ; NoVLX-NEXT: vzeroupper
10824 %0 = bitcast <2 x i64> %__a to <4 x i32>
10825 %1 = bitcast <2 x i64> %__b to <4 x i32>
10826 %2 = icmp sge <4 x i32> %0, %1
10827 %3 = bitcast i8 %__u to <8 x i1>
10828 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10829 %4 = and <4 x i1> %2, %extract.i
10830 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10831 %6 = bitcast <8 x i1> %5 to i8
10835 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
10836 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
10837 ; VLX: # %bb.0: # %entry
10838 ; VLX-NEXT: kmovd %edi, %k1
10839 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
10840 ; VLX-NEXT: kmovd %k0, %eax
10841 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10844 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
10845 ; NoVLX: # %bb.0: # %entry
10846 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10847 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10848 ; NoVLX-NEXT: kmovw %edi, %k1
10849 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10850 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10851 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10852 ; NoVLX-NEXT: kmovw %k0, %eax
10853 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10854 ; NoVLX-NEXT: vzeroupper
10857 %0 = bitcast <2 x i64> %__a to <4 x i32>
10858 %load = load <2 x i64>, ptr %__b
10859 %1 = bitcast <2 x i64> %load to <4 x i32>
10860 %2 = icmp sge <4 x i32> %0, %1
10861 %3 = bitcast i8 %__u to <8 x i1>
10862 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10863 %4 = and <4 x i1> %2, %extract.i
10864 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10865 %6 = bitcast <8 x i1> %5 to i8
10870 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10871 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
10872 ; VLX: # %bb.0: # %entry
10873 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
10874 ; VLX-NEXT: kmovd %k0, %eax
10875 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10878 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
10879 ; NoVLX: # %bb.0: # %entry
10880 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10881 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
10882 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10883 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10884 ; NoVLX-NEXT: kmovw %k0, %eax
10885 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10886 ; NoVLX-NEXT: vzeroupper
10889 %0 = bitcast <2 x i64> %__a to <4 x i32>
10890 %load = load i32, ptr %__b
10891 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
10892 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
10893 %2 = icmp sge <4 x i32> %0, %1
10894 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10895 %4 = bitcast <8 x i1> %3 to i8
10899 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
10900 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
10901 ; VLX: # %bb.0: # %entry
10902 ; VLX-NEXT: kmovd %edi, %k1
10903 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
10904 ; VLX-NEXT: kmovd %k0, %eax
10905 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10908 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
10909 ; NoVLX: # %bb.0: # %entry
10910 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10911 ; NoVLX-NEXT: kmovw %edi, %k1
10912 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
10913 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10914 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10915 ; NoVLX-NEXT: kmovw %k0, %eax
10916 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10917 ; NoVLX-NEXT: vzeroupper
10920 %0 = bitcast <2 x i64> %__a to <4 x i32>
10921 %load = load i32, ptr %__b
10922 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
10923 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
10924 %2 = icmp sge <4 x i32> %0, %1
10925 %3 = bitcast i8 %__u to <8 x i1>
10926 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10927 %4 = and <4 x i1> %extract.i, %2
10928 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10929 %6 = bitcast <8 x i1> %5 to i8
10934 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10935 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
10936 ; VLX: # %bb.0: # %entry
10937 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
10938 ; VLX-NEXT: kmovd %k0, %eax
10939 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10942 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
10943 ; NoVLX: # %bb.0: # %entry
10944 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10945 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10946 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10947 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10948 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10949 ; NoVLX-NEXT: kmovw %k0, %eax
10950 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10951 ; NoVLX-NEXT: vzeroupper
10954 %0 = bitcast <2 x i64> %__a to <4 x i32>
10955 %1 = bitcast <2 x i64> %__b to <4 x i32>
10956 %2 = icmp sge <4 x i32> %0, %1
10957 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
10958 %4 = bitcast <16 x i1> %3 to i16
10962 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
10963 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
10964 ; VLX: # %bb.0: # %entry
10965 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
10966 ; VLX-NEXT: kmovd %k0, %eax
10967 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10970 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
10971 ; NoVLX: # %bb.0: # %entry
10972 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10973 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10974 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10975 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10976 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10977 ; NoVLX-NEXT: kmovw %k0, %eax
10978 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10979 ; NoVLX-NEXT: vzeroupper
10982 %0 = bitcast <2 x i64> %__a to <4 x i32>
10983 %load = load <2 x i64>, ptr %__b
10984 %1 = bitcast <2 x i64> %load to <4 x i32>
10985 %2 = icmp sge <4 x i32> %0, %1
10986 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
10987 %4 = bitcast <16 x i1> %3 to i16
10991 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10992 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
10993 ; VLX: # %bb.0: # %entry
10994 ; VLX-NEXT: kmovd %edi, %k1
10995 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
10996 ; VLX-NEXT: kmovd %k0, %eax
10997 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11000 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
11001 ; NoVLX: # %bb.0: # %entry
11002 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11003 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11004 ; NoVLX-NEXT: kmovw %edi, %k1
11005 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11006 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11007 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11008 ; NoVLX-NEXT: kmovw %k0, %eax
11009 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11010 ; NoVLX-NEXT: vzeroupper
11013 %0 = bitcast <2 x i64> %__a to <4 x i32>
11014 %1 = bitcast <2 x i64> %__b to <4 x i32>
11015 %2 = icmp sge <4 x i32> %0, %1
11016 %3 = bitcast i8 %__u to <8 x i1>
11017 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11018 %4 = and <4 x i1> %2, %extract.i
11019 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11020 %6 = bitcast <16 x i1> %5 to i16
11024 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11025 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11026 ; VLX: # %bb.0: # %entry
11027 ; VLX-NEXT: kmovd %edi, %k1
11028 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11029 ; VLX-NEXT: kmovd %k0, %eax
11030 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11033 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11034 ; NoVLX: # %bb.0: # %entry
11035 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11036 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11037 ; NoVLX-NEXT: kmovw %edi, %k1
11038 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11039 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11040 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11041 ; NoVLX-NEXT: kmovw %k0, %eax
11042 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11043 ; NoVLX-NEXT: vzeroupper
11046 %0 = bitcast <2 x i64> %__a to <4 x i32>
11047 %load = load <2 x i64>, ptr %__b
11048 %1 = bitcast <2 x i64> %load to <4 x i32>
11049 %2 = icmp sge <4 x i32> %0, %1
11050 %3 = bitcast i8 %__u to <8 x i1>
11051 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11052 %4 = and <4 x i1> %2, %extract.i
11053 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11054 %6 = bitcast <16 x i1> %5 to i16
11059 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
11060 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11061 ; VLX: # %bb.0: # %entry
11062 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11063 ; VLX-NEXT: kmovd %k0, %eax
11064 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11067 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11068 ; NoVLX: # %bb.0: # %entry
11069 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11070 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11071 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11072 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11073 ; NoVLX-NEXT: kmovw %k0, %eax
11074 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11075 ; NoVLX-NEXT: vzeroupper
11078 %0 = bitcast <2 x i64> %__a to <4 x i32>
11079 %load = load i32, ptr %__b
11080 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11081 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11082 %2 = icmp sge <4 x i32> %0, %1
11083 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11084 %4 = bitcast <16 x i1> %3 to i16
11088 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11089 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11090 ; VLX: # %bb.0: # %entry
11091 ; VLX-NEXT: kmovd %edi, %k1
11092 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11093 ; VLX-NEXT: kmovd %k0, %eax
11094 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11097 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11098 ; NoVLX: # %bb.0: # %entry
11099 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11100 ; NoVLX-NEXT: kmovw %edi, %k1
11101 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11102 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11103 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11104 ; NoVLX-NEXT: kmovw %k0, %eax
11105 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11106 ; NoVLX-NEXT: vzeroupper
11109 %0 = bitcast <2 x i64> %__a to <4 x i32>
11110 %load = load i32, ptr %__b
11111 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11112 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11113 %2 = icmp sge <4 x i32> %0, %1
11114 %3 = bitcast i8 %__u to <8 x i1>
11115 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11116 %4 = and <4 x i1> %extract.i, %2
11117 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11118 %6 = bitcast <16 x i1> %5 to i16
11123 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11124 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11125 ; VLX: # %bb.0: # %entry
11126 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
11127 ; VLX-NEXT: kmovd %k0, %eax
11130 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11131 ; NoVLX: # %bb.0: # %entry
11132 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11133 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11134 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11135 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11136 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11137 ; NoVLX-NEXT: kmovw %k0, %eax
11138 ; NoVLX-NEXT: vzeroupper
11141 %0 = bitcast <2 x i64> %__a to <4 x i32>
11142 %1 = bitcast <2 x i64> %__b to <4 x i32>
11143 %2 = icmp sge <4 x i32> %0, %1
11144 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11145 %4 = bitcast <32 x i1> %3 to i32
11149 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
11150 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11151 ; VLX: # %bb.0: # %entry
11152 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
11153 ; VLX-NEXT: kmovd %k0, %eax
11156 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11157 ; NoVLX: # %bb.0: # %entry
11158 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11159 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
11160 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11161 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11162 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11163 ; NoVLX-NEXT: kmovw %k0, %eax
11164 ; NoVLX-NEXT: vzeroupper
11167 %0 = bitcast <2 x i64> %__a to <4 x i32>
11168 %load = load <2 x i64>, ptr %__b
11169 %1 = bitcast <2 x i64> %load to <4 x i32>
11170 %2 = icmp sge <4 x i32> %0, %1
11171 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11172 %4 = bitcast <32 x i1> %3 to i32
11176 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11177 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11178 ; VLX: # %bb.0: # %entry
11179 ; VLX-NEXT: kmovd %edi, %k1
11180 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11181 ; VLX-NEXT: kmovd %k0, %eax
11184 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11185 ; NoVLX: # %bb.0: # %entry
11186 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11187 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11188 ; NoVLX-NEXT: kmovw %edi, %k1
11189 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11190 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11191 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11192 ; NoVLX-NEXT: kmovw %k0, %eax
11193 ; NoVLX-NEXT: vzeroupper
11196 %0 = bitcast <2 x i64> %__a to <4 x i32>
11197 %1 = bitcast <2 x i64> %__b to <4 x i32>
11198 %2 = icmp sge <4 x i32> %0, %1
11199 %3 = bitcast i8 %__u to <8 x i1>
11200 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11201 %4 = and <4 x i1> %2, %extract.i
11202 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11203 %6 = bitcast <32 x i1> %5 to i32
11207 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11208 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11209 ; VLX: # %bb.0: # %entry
11210 ; VLX-NEXT: kmovd %edi, %k1
11211 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11212 ; VLX-NEXT: kmovd %k0, %eax
11215 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11216 ; NoVLX: # %bb.0: # %entry
11217 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11218 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11219 ; NoVLX-NEXT: kmovw %edi, %k1
11220 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11221 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11222 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11223 ; NoVLX-NEXT: kmovw %k0, %eax
11224 ; NoVLX-NEXT: vzeroupper
11227 %0 = bitcast <2 x i64> %__a to <4 x i32>
11228 %load = load <2 x i64>, ptr %__b
11229 %1 = bitcast <2 x i64> %load to <4 x i32>
11230 %2 = icmp sge <4 x i32> %0, %1
11231 %3 = bitcast i8 %__u to <8 x i1>
11232 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11233 %4 = and <4 x i1> %2, %extract.i
11234 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11235 %6 = bitcast <32 x i1> %5 to i32
11240 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
11241 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11242 ; VLX: # %bb.0: # %entry
11243 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11244 ; VLX-NEXT: kmovd %k0, %eax
11247 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11248 ; NoVLX: # %bb.0: # %entry
11249 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11250 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11251 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11252 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11253 ; NoVLX-NEXT: kmovw %k0, %eax
11254 ; NoVLX-NEXT: vzeroupper
11257 %0 = bitcast <2 x i64> %__a to <4 x i32>
11258 %load = load i32, ptr %__b
11259 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11260 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11261 %2 = icmp sge <4 x i32> %0, %1
11262 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11263 %4 = bitcast <32 x i1> %3 to i32
11267 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11268 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11269 ; VLX: # %bb.0: # %entry
11270 ; VLX-NEXT: kmovd %edi, %k1
11271 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11272 ; VLX-NEXT: kmovd %k0, %eax
11275 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11276 ; NoVLX: # %bb.0: # %entry
11277 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11278 ; NoVLX-NEXT: kmovw %edi, %k1
11279 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11280 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11281 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11282 ; NoVLX-NEXT: kmovw %k0, %eax
11283 ; NoVLX-NEXT: vzeroupper
11286 %0 = bitcast <2 x i64> %__a to <4 x i32>
11287 %load = load i32, ptr %__b
11288 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11289 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11290 %2 = icmp sge <4 x i32> %0, %1
11291 %3 = bitcast i8 %__u to <8 x i1>
11292 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11293 %4 = and <4 x i1> %extract.i, %2
11294 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11295 %6 = bitcast <32 x i1> %5 to i32
11300 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11301 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11302 ; VLX: # %bb.0: # %entry
11303 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
11304 ; VLX-NEXT: kmovq %k0, %rax
11307 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11308 ; NoVLX: # %bb.0: # %entry
11309 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11310 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11311 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11312 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11313 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11314 ; NoVLX-NEXT: kmovw %k0, %eax
11315 ; NoVLX-NEXT: vzeroupper
11318 %0 = bitcast <2 x i64> %__a to <4 x i32>
11319 %1 = bitcast <2 x i64> %__b to <4 x i32>
11320 %2 = icmp sge <4 x i32> %0, %1
11321 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11322 %4 = bitcast <64 x i1> %3 to i64
11326 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
11327 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11328 ; VLX: # %bb.0: # %entry
11329 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
11330 ; VLX-NEXT: kmovq %k0, %rax
11333 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11334 ; NoVLX: # %bb.0: # %entry
11335 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11336 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
11337 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11338 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11339 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11340 ; NoVLX-NEXT: kmovw %k0, %eax
11341 ; NoVLX-NEXT: vzeroupper
11344 %0 = bitcast <2 x i64> %__a to <4 x i32>
11345 %load = load <2 x i64>, ptr %__b
11346 %1 = bitcast <2 x i64> %load to <4 x i32>
11347 %2 = icmp sge <4 x i32> %0, %1
11348 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11349 %4 = bitcast <64 x i1> %3 to i64
11353 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11354 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11355 ; VLX: # %bb.0: # %entry
11356 ; VLX-NEXT: kmovd %edi, %k1
11357 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11358 ; VLX-NEXT: kmovq %k0, %rax
11361 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11362 ; NoVLX: # %bb.0: # %entry
11363 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11364 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11365 ; NoVLX-NEXT: kmovw %edi, %k1
11366 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11367 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11368 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11369 ; NoVLX-NEXT: kmovw %k0, %eax
11370 ; NoVLX-NEXT: vzeroupper
11373 %0 = bitcast <2 x i64> %__a to <4 x i32>
11374 %1 = bitcast <2 x i64> %__b to <4 x i32>
11375 %2 = icmp sge <4 x i32> %0, %1
11376 %3 = bitcast i8 %__u to <8 x i1>
11377 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11378 %4 = and <4 x i1> %2, %extract.i
11379 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11380 %6 = bitcast <64 x i1> %5 to i64
11384 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11385 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11386 ; VLX: # %bb.0: # %entry
11387 ; VLX-NEXT: kmovd %edi, %k1
11388 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11389 ; VLX-NEXT: kmovq %k0, %rax
11392 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11393 ; NoVLX: # %bb.0: # %entry
11394 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11395 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11396 ; NoVLX-NEXT: kmovw %edi, %k1
11397 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11398 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11399 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11400 ; NoVLX-NEXT: kmovw %k0, %eax
11401 ; NoVLX-NEXT: vzeroupper
11404 %0 = bitcast <2 x i64> %__a to <4 x i32>
11405 %load = load <2 x i64>, ptr %__b
11406 %1 = bitcast <2 x i64> %load to <4 x i32>
11407 %2 = icmp sge <4 x i32> %0, %1
11408 %3 = bitcast i8 %__u to <8 x i1>
11409 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11410 %4 = and <4 x i1> %2, %extract.i
11411 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11412 %6 = bitcast <64 x i1> %5 to i64
11417 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
11418 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11419 ; VLX: # %bb.0: # %entry
11420 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11421 ; VLX-NEXT: kmovq %k0, %rax
11424 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11425 ; NoVLX: # %bb.0: # %entry
11426 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11427 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11428 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11429 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11430 ; NoVLX-NEXT: kmovw %k0, %eax
11431 ; NoVLX-NEXT: vzeroupper
11434 %0 = bitcast <2 x i64> %__a to <4 x i32>
11435 %load = load i32, ptr %__b
11436 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11437 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11438 %2 = icmp sge <4 x i32> %0, %1
11439 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11440 %4 = bitcast <64 x i1> %3 to i64
11444 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
11445 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11446 ; VLX: # %bb.0: # %entry
11447 ; VLX-NEXT: kmovd %edi, %k1
11448 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11449 ; VLX-NEXT: kmovq %k0, %rax
11452 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11453 ; NoVLX: # %bb.0: # %entry
11454 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11455 ; NoVLX-NEXT: kmovw %edi, %k1
11456 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11457 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11458 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11459 ; NoVLX-NEXT: kmovw %k0, %eax
11460 ; NoVLX-NEXT: vzeroupper
11463 %0 = bitcast <2 x i64> %__a to <4 x i32>
11464 %load = load i32, ptr %__b
11465 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11466 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11467 %2 = icmp sge <4 x i32> %0, %1
11468 %3 = bitcast i8 %__u to <8 x i1>
11469 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11470 %4 = and <4 x i1> %extract.i, %2
11471 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11472 %6 = bitcast <64 x i1> %5 to i64
11477 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11478 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11479 ; VLX: # %bb.0: # %entry
11480 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11481 ; VLX-NEXT: kmovd %k0, %eax
11482 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11483 ; VLX-NEXT: vzeroupper
11486 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11487 ; NoVLX: # %bb.0: # %entry
11488 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11489 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11490 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11491 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11492 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11493 ; NoVLX-NEXT: kmovw %k0, %eax
11494 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11495 ; NoVLX-NEXT: vzeroupper
11498 %0 = bitcast <4 x i64> %__a to <8 x i32>
11499 %1 = bitcast <4 x i64> %__b to <8 x i32>
11500 %2 = icmp sge <8 x i32> %0, %1
11501 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11502 %4 = bitcast <16 x i1> %3 to i16
11506 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11507 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11508 ; VLX: # %bb.0: # %entry
11509 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11510 ; VLX-NEXT: kmovd %k0, %eax
11511 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11512 ; VLX-NEXT: vzeroupper
11515 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11516 ; NoVLX: # %bb.0: # %entry
11517 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11518 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11519 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11520 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11521 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11522 ; NoVLX-NEXT: kmovw %k0, %eax
11523 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11524 ; NoVLX-NEXT: vzeroupper
11527 %0 = bitcast <4 x i64> %__a to <8 x i32>
11528 %load = load <4 x i64>, ptr %__b
11529 %1 = bitcast <4 x i64> %load to <8 x i32>
11530 %2 = icmp sge <8 x i32> %0, %1
11531 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11532 %4 = bitcast <16 x i1> %3 to i16
11536 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11537 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11538 ; VLX: # %bb.0: # %entry
11539 ; VLX-NEXT: kmovd %edi, %k1
11540 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11541 ; VLX-NEXT: kmovd %k0, %eax
11542 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11543 ; VLX-NEXT: vzeroupper
11546 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11547 ; NoVLX: # %bb.0: # %entry
11548 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11549 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11550 ; NoVLX-NEXT: kmovw %edi, %k1
11551 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11552 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11553 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11554 ; NoVLX-NEXT: kmovw %k0, %eax
11555 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11556 ; NoVLX-NEXT: vzeroupper
11559 %0 = bitcast <4 x i64> %__a to <8 x i32>
11560 %1 = bitcast <4 x i64> %__b to <8 x i32>
11561 %2 = icmp sge <8 x i32> %0, %1
11562 %3 = bitcast i8 %__u to <8 x i1>
11563 %4 = and <8 x i1> %2, %3
11564 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11565 %6 = bitcast <16 x i1> %5 to i16
11569 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11570 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11571 ; VLX: # %bb.0: # %entry
11572 ; VLX-NEXT: kmovd %edi, %k1
11573 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11574 ; VLX-NEXT: kmovd %k0, %eax
11575 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11576 ; VLX-NEXT: vzeroupper
11579 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11580 ; NoVLX: # %bb.0: # %entry
11581 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11582 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
11583 ; NoVLX-NEXT: kmovw %edi, %k1
11584 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11585 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11586 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11587 ; NoVLX-NEXT: kmovw %k0, %eax
11588 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11589 ; NoVLX-NEXT: vzeroupper
11592 %0 = bitcast <4 x i64> %__a to <8 x i32>
11593 %load = load <4 x i64>, ptr %__b
11594 %1 = bitcast <4 x i64> %load to <8 x i32>
11595 %2 = icmp sge <8 x i32> %0, %1
11596 %3 = bitcast i8 %__u to <8 x i1>
11597 %4 = and <8 x i1> %2, %3
11598 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11599 %6 = bitcast <16 x i1> %5 to i16
11604 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11605 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11606 ; VLX: # %bb.0: # %entry
11607 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11608 ; VLX-NEXT: kmovd %k0, %eax
11609 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11610 ; VLX-NEXT: vzeroupper
11613 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11614 ; NoVLX: # %bb.0: # %entry
11615 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11616 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11617 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11618 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11619 ; NoVLX-NEXT: kmovw %k0, %eax
11620 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11621 ; NoVLX-NEXT: vzeroupper
11624 %0 = bitcast <4 x i64> %__a to <8 x i32>
11625 %load = load i32, ptr %__b
11626 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11627 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11628 %2 = icmp sge <8 x i32> %0, %1
11629 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11630 %4 = bitcast <16 x i1> %3 to i16
11634 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11635 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11636 ; VLX: # %bb.0: # %entry
11637 ; VLX-NEXT: kmovd %edi, %k1
11638 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11639 ; VLX-NEXT: kmovd %k0, %eax
11640 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11641 ; VLX-NEXT: vzeroupper
11644 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11645 ; NoVLX: # %bb.0: # %entry
11646 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11647 ; NoVLX-NEXT: kmovw %edi, %k1
11648 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11649 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11650 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11651 ; NoVLX-NEXT: kmovw %k0, %eax
11652 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11653 ; NoVLX-NEXT: vzeroupper
11656 %0 = bitcast <4 x i64> %__a to <8 x i32>
11657 %load = load i32, ptr %__b
11658 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11659 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11660 %2 = icmp sge <8 x i32> %0, %1
11661 %3 = bitcast i8 %__u to <8 x i1>
11662 %4 = and <8 x i1> %3, %2
11663 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11664 %6 = bitcast <16 x i1> %5 to i16
11669 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11670 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11671 ; VLX: # %bb.0: # %entry
11672 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11673 ; VLX-NEXT: kmovd %k0, %eax
11674 ; VLX-NEXT: vzeroupper
11677 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11678 ; NoVLX: # %bb.0: # %entry
11679 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11680 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11681 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11682 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11683 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11684 ; NoVLX-NEXT: kmovw %k0, %eax
11685 ; NoVLX-NEXT: vzeroupper
11688 %0 = bitcast <4 x i64> %__a to <8 x i32>
11689 %1 = bitcast <4 x i64> %__b to <8 x i32>
11690 %2 = icmp sge <8 x i32> %0, %1
11691 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11692 %4 = bitcast <32 x i1> %3 to i32
11696 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11697 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11698 ; VLX: # %bb.0: # %entry
11699 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11700 ; VLX-NEXT: kmovd %k0, %eax
11701 ; VLX-NEXT: vzeroupper
11704 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11705 ; NoVLX: # %bb.0: # %entry
11706 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11707 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11708 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11709 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11710 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11711 ; NoVLX-NEXT: kmovw %k0, %eax
11712 ; NoVLX-NEXT: vzeroupper
11715 %0 = bitcast <4 x i64> %__a to <8 x i32>
11716 %load = load <4 x i64>, ptr %__b
11717 %1 = bitcast <4 x i64> %load to <8 x i32>
11718 %2 = icmp sge <8 x i32> %0, %1
11719 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11720 %4 = bitcast <32 x i1> %3 to i32
11724 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11725 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11726 ; VLX: # %bb.0: # %entry
11727 ; VLX-NEXT: kmovd %edi, %k1
11728 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11729 ; VLX-NEXT: kmovd %k0, %eax
11730 ; VLX-NEXT: vzeroupper
11733 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11734 ; NoVLX: # %bb.0: # %entry
11735 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11736 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11737 ; NoVLX-NEXT: kmovw %edi, %k1
11738 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11739 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11740 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11741 ; NoVLX-NEXT: kmovw %k0, %eax
11742 ; NoVLX-NEXT: vzeroupper
11745 %0 = bitcast <4 x i64> %__a to <8 x i32>
11746 %1 = bitcast <4 x i64> %__b to <8 x i32>
11747 %2 = icmp sge <8 x i32> %0, %1
11748 %3 = bitcast i8 %__u to <8 x i1>
11749 %4 = and <8 x i1> %2, %3
11750 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11751 %6 = bitcast <32 x i1> %5 to i32
11755 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11756 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11757 ; VLX: # %bb.0: # %entry
11758 ; VLX-NEXT: kmovd %edi, %k1
11759 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11760 ; VLX-NEXT: kmovd %k0, %eax
11761 ; VLX-NEXT: vzeroupper
11764 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11765 ; NoVLX: # %bb.0: # %entry
11766 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11767 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
11768 ; NoVLX-NEXT: kmovw %edi, %k1
11769 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11770 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11771 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11772 ; NoVLX-NEXT: kmovw %k0, %eax
11773 ; NoVLX-NEXT: vzeroupper
11776 %0 = bitcast <4 x i64> %__a to <8 x i32>
11777 %load = load <4 x i64>, ptr %__b
11778 %1 = bitcast <4 x i64> %load to <8 x i32>
11779 %2 = icmp sge <8 x i32> %0, %1
11780 %3 = bitcast i8 %__u to <8 x i1>
11781 %4 = and <8 x i1> %2, %3
11782 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11783 %6 = bitcast <32 x i1> %5 to i32
11788 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11789 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11790 ; VLX: # %bb.0: # %entry
11791 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11792 ; VLX-NEXT: kmovd %k0, %eax
11793 ; VLX-NEXT: vzeroupper
11796 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11797 ; NoVLX: # %bb.0: # %entry
11798 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11799 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11800 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11801 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11802 ; NoVLX-NEXT: kmovw %k0, %eax
11803 ; NoVLX-NEXT: vzeroupper
11806 %0 = bitcast <4 x i64> %__a to <8 x i32>
11807 %load = load i32, ptr %__b
11808 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11809 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11810 %2 = icmp sge <8 x i32> %0, %1
11811 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11812 %4 = bitcast <32 x i1> %3 to i32
11816 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11817 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
11818 ; VLX: # %bb.0: # %entry
11819 ; VLX-NEXT: kmovd %edi, %k1
11820 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11821 ; VLX-NEXT: kmovd %k0, %eax
11822 ; VLX-NEXT: vzeroupper
11825 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
11826 ; NoVLX: # %bb.0: # %entry
11827 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11828 ; NoVLX-NEXT: kmovw %edi, %k1
11829 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
11830 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11831 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11832 ; NoVLX-NEXT: kmovw %k0, %eax
11833 ; NoVLX-NEXT: vzeroupper
11836 %0 = bitcast <4 x i64> %__a to <8 x i32>
11837 %load = load i32, ptr %__b
11838 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11839 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11840 %2 = icmp sge <8 x i32> %0, %1
11841 %3 = bitcast i8 %__u to <8 x i1>
11842 %4 = and <8 x i1> %3, %2
11843 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11844 %6 = bitcast <32 x i1> %5 to i32
11849 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11850 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
11851 ; VLX: # %bb.0: # %entry
11852 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11853 ; VLX-NEXT: kmovq %k0, %rax
11854 ; VLX-NEXT: vzeroupper
11857 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
11858 ; NoVLX: # %bb.0: # %entry
11859 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11860 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11861 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11862 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11863 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11864 ; NoVLX-NEXT: kmovw %k0, %eax
11865 ; NoVLX-NEXT: vzeroupper
11868 %0 = bitcast <4 x i64> %__a to <8 x i32>
11869 %1 = bitcast <4 x i64> %__b to <8 x i32>
11870 %2 = icmp sge <8 x i32> %0, %1
11871 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11872 %4 = bitcast <64 x i1> %3 to i64
11876 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11877 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
11878 ; VLX: # %bb.0: # %entry
11879 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11880 ; VLX-NEXT: kmovq %k0, %rax
11881 ; VLX-NEXT: vzeroupper
11884 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
11885 ; NoVLX: # %bb.0: # %entry
11886 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11887 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11888 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11889 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11890 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11891 ; NoVLX-NEXT: kmovw %k0, %eax
11892 ; NoVLX-NEXT: vzeroupper
11895 %0 = bitcast <4 x i64> %__a to <8 x i32>
11896 %load = load <4 x i64>, ptr %__b
11897 %1 = bitcast <4 x i64> %load to <8 x i32>
11898 %2 = icmp sge <8 x i32> %0, %1
11899 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11900 %4 = bitcast <64 x i1> %3 to i64
11904 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11905 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
11906 ; VLX: # %bb.0: # %entry
11907 ; VLX-NEXT: kmovd %edi, %k1
11908 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11909 ; VLX-NEXT: kmovq %k0, %rax
11910 ; VLX-NEXT: vzeroupper
11913 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
11914 ; NoVLX: # %bb.0: # %entry
11915 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11916 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11917 ; NoVLX-NEXT: kmovw %edi, %k1
11918 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11919 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11920 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11921 ; NoVLX-NEXT: kmovw %k0, %eax
11922 ; NoVLX-NEXT: vzeroupper
11925 %0 = bitcast <4 x i64> %__a to <8 x i32>
11926 %1 = bitcast <4 x i64> %__b to <8 x i32>
11927 %2 = icmp sge <8 x i32> %0, %1
11928 %3 = bitcast i8 %__u to <8 x i1>
11929 %4 = and <8 x i1> %2, %3
11930 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11931 %6 = bitcast <64 x i1> %5 to i64
11935 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11936 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
11937 ; VLX: # %bb.0: # %entry
11938 ; VLX-NEXT: kmovd %edi, %k1
11939 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11940 ; VLX-NEXT: kmovq %k0, %rax
11941 ; VLX-NEXT: vzeroupper
11944 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
11945 ; NoVLX: # %bb.0: # %entry
11946 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11947 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
11948 ; NoVLX-NEXT: kmovw %edi, %k1
11949 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11950 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11951 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11952 ; NoVLX-NEXT: kmovw %k0, %eax
11953 ; NoVLX-NEXT: vzeroupper
11956 %0 = bitcast <4 x i64> %__a to <8 x i32>
11957 %load = load <4 x i64>, ptr %__b
11958 %1 = bitcast <4 x i64> %load to <8 x i32>
11959 %2 = icmp sge <8 x i32> %0, %1
11960 %3 = bitcast i8 %__u to <8 x i1>
11961 %4 = and <8 x i1> %2, %3
11962 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11963 %6 = bitcast <64 x i1> %5 to i64
11968 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
11969 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
11970 ; VLX: # %bb.0: # %entry
11971 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11972 ; VLX-NEXT: kmovq %k0, %rax
11973 ; VLX-NEXT: vzeroupper
11976 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
11977 ; NoVLX: # %bb.0: # %entry
11978 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11979 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
11980 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11981 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11982 ; NoVLX-NEXT: kmovw %k0, %eax
11983 ; NoVLX-NEXT: vzeroupper
11986 %0 = bitcast <4 x i64> %__a to <8 x i32>
11987 %load = load i32, ptr %__b
11988 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11989 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11990 %2 = icmp sge <8 x i32> %0, %1
11991 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11992 %4 = bitcast <64 x i1> %3 to i64
11996 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
11997 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
11998 ; VLX: # %bb.0: # %entry
11999 ; VLX-NEXT: kmovd %edi, %k1
12000 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
12001 ; VLX-NEXT: kmovq %k0, %rax
12002 ; VLX-NEXT: vzeroupper
12005 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
12006 ; NoVLX: # %bb.0: # %entry
12007 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
12008 ; NoVLX-NEXT: kmovw %edi, %k1
12009 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12010 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
12011 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
12012 ; NoVLX-NEXT: kmovw %k0, %eax
12013 ; NoVLX-NEXT: vzeroupper
12016 %0 = bitcast <4 x i64> %__a to <8 x i32>
12017 %load = load i32, ptr %__b
12018 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12019 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12020 %2 = icmp sge <8 x i32> %0, %1
12021 %3 = bitcast i8 %__u to <8 x i1>
12022 %4 = and <8 x i1> %3, %2
12023 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12024 %6 = bitcast <64 x i1> %5 to i64
12029 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12030 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12031 ; VLX: # %bb.0: # %entry
12032 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12033 ; VLX-NEXT: kmovd %k0, %eax
12034 ; VLX-NEXT: vzeroupper
12037 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12038 ; NoVLX: # %bb.0: # %entry
12039 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12040 ; NoVLX-NEXT: kmovw %k0, %eax
12041 ; NoVLX-NEXT: vzeroupper
12044 %0 = bitcast <8 x i64> %__a to <16 x i32>
12045 %1 = bitcast <8 x i64> %__b to <16 x i32>
12046 %2 = icmp sge <16 x i32> %0, %1
12047 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12048 %4 = bitcast <32 x i1> %3 to i32
12052 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
12053 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12054 ; VLX: # %bb.0: # %entry
12055 ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12056 ; VLX-NEXT: kmovd %k0, %eax
12057 ; VLX-NEXT: vzeroupper
12060 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12061 ; NoVLX: # %bb.0: # %entry
12062 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12063 ; NoVLX-NEXT: kmovw %k0, %eax
12064 ; NoVLX-NEXT: vzeroupper
12067 %0 = bitcast <8 x i64> %__a to <16 x i32>
12068 %load = load <8 x i64>, ptr %__b
12069 %1 = bitcast <8 x i64> %load to <16 x i32>
12070 %2 = icmp sge <16 x i32> %0, %1
12071 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12072 %4 = bitcast <32 x i1> %3 to i32
12076 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12077 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12078 ; VLX: # %bb.0: # %entry
12079 ; VLX-NEXT: kmovd %edi, %k1
12080 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12081 ; VLX-NEXT: kmovd %k0, %eax
12082 ; VLX-NEXT: vzeroupper
12085 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12086 ; NoVLX: # %bb.0: # %entry
12087 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12088 ; NoVLX-NEXT: kmovw %k0, %eax
12089 ; NoVLX-NEXT: andl %edi, %eax
12090 ; NoVLX-NEXT: vzeroupper
12093 %0 = bitcast <8 x i64> %__a to <16 x i32>
12094 %1 = bitcast <8 x i64> %__b to <16 x i32>
12095 %2 = icmp sge <16 x i32> %0, %1
12096 %3 = bitcast i16 %__u to <16 x i1>
12097 %4 = and <16 x i1> %2, %3
12098 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12099 %6 = bitcast <32 x i1> %5 to i32
12103 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
12104 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12105 ; VLX: # %bb.0: # %entry
12106 ; VLX-NEXT: kmovd %edi, %k1
12107 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12108 ; VLX-NEXT: kmovd %k0, %eax
12109 ; VLX-NEXT: vzeroupper
12112 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12113 ; NoVLX: # %bb.0: # %entry
12114 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0
12115 ; NoVLX-NEXT: kmovw %k0, %eax
12116 ; NoVLX-NEXT: andl %edi, %eax
12117 ; NoVLX-NEXT: vzeroupper
12120 %0 = bitcast <8 x i64> %__a to <16 x i32>
12121 %load = load <8 x i64>, ptr %__b
12122 %1 = bitcast <8 x i64> %load to <16 x i32>
12123 %2 = icmp sge <16 x i32> %0, %1
12124 %3 = bitcast i16 %__u to <16 x i1>
12125 %4 = and <16 x i1> %2, %3
12126 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12127 %6 = bitcast <32 x i1> %5 to i32
12132 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
12133 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12134 ; VLX: # %bb.0: # %entry
12135 ; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12136 ; VLX-NEXT: kmovd %k0, %eax
12137 ; VLX-NEXT: vzeroupper
12140 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12141 ; NoVLX: # %bb.0: # %entry
12142 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12143 ; NoVLX-NEXT: kmovw %k0, %eax
12144 ; NoVLX-NEXT: vzeroupper
12147 %0 = bitcast <8 x i64> %__a to <16 x i32>
12148 %load = load i32, ptr %__b
12149 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12150 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12151 %2 = icmp sge <16 x i32> %0, %1
12152 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12153 %4 = bitcast <32 x i1> %3 to i32
12157 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
12158 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12159 ; VLX: # %bb.0: # %entry
12160 ; VLX-NEXT: kmovd %edi, %k1
12161 ; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12162 ; VLX-NEXT: kmovd %k0, %eax
12163 ; VLX-NEXT: vzeroupper
12166 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12167 ; NoVLX: # %bb.0: # %entry
12168 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12169 ; NoVLX-NEXT: kmovw %k0, %eax
12170 ; NoVLX-NEXT: andl %edi, %eax
12171 ; NoVLX-NEXT: vzeroupper
12174 %0 = bitcast <8 x i64> %__a to <16 x i32>
12175 %load = load i32, ptr %__b
12176 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12177 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12178 %2 = icmp sge <16 x i32> %0, %1
12179 %3 = bitcast i16 %__u to <16 x i1>
12180 %4 = and <16 x i1> %3, %2
12181 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12182 %6 = bitcast <32 x i1> %5 to i32
12187 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12188 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12189 ; VLX: # %bb.0: # %entry
12190 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12191 ; VLX-NEXT: kmovq %k0, %rax
12192 ; VLX-NEXT: vzeroupper
12195 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12196 ; NoVLX: # %bb.0: # %entry
12197 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12198 ; NoVLX-NEXT: kmovw %k0, %eax
12199 ; NoVLX-NEXT: vzeroupper
12202 %0 = bitcast <8 x i64> %__a to <16 x i32>
12203 %1 = bitcast <8 x i64> %__b to <16 x i32>
12204 %2 = icmp sge <16 x i32> %0, %1
12205 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12206 %4 = bitcast <64 x i1> %3 to i64
12210 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
12211 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12212 ; VLX: # %bb.0: # %entry
12213 ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12214 ; VLX-NEXT: kmovq %k0, %rax
12215 ; VLX-NEXT: vzeroupper
12218 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12219 ; NoVLX: # %bb.0: # %entry
12220 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12221 ; NoVLX-NEXT: kmovw %k0, %eax
12222 ; NoVLX-NEXT: vzeroupper
12225 %0 = bitcast <8 x i64> %__a to <16 x i32>
12226 %load = load <8 x i64>, ptr %__b
12227 %1 = bitcast <8 x i64> %load to <16 x i32>
12228 %2 = icmp sge <16 x i32> %0, %1
12229 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12230 %4 = bitcast <64 x i1> %3 to i64
12234 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12235 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12236 ; VLX: # %bb.0: # %entry
12237 ; VLX-NEXT: kmovd %edi, %k1
12238 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12239 ; VLX-NEXT: kmovq %k0, %rax
12240 ; VLX-NEXT: vzeroupper
12243 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12244 ; NoVLX: # %bb.0: # %entry
12245 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12246 ; NoVLX-NEXT: kmovw %k0, %eax
12247 ; NoVLX-NEXT: andl %edi, %eax
12248 ; NoVLX-NEXT: vzeroupper
12251 %0 = bitcast <8 x i64> %__a to <16 x i32>
12252 %1 = bitcast <8 x i64> %__b to <16 x i32>
12253 %2 = icmp sge <16 x i32> %0, %1
12254 %3 = bitcast i16 %__u to <16 x i1>
12255 %4 = and <16 x i1> %2, %3
12256 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12257 %6 = bitcast <64 x i1> %5 to i64
12261 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
12262 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12263 ; VLX: # %bb.0: # %entry
12264 ; VLX-NEXT: kmovd %edi, %k1
12265 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12266 ; VLX-NEXT: kmovq %k0, %rax
12267 ; VLX-NEXT: vzeroupper
12270 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12271 ; NoVLX: # %bb.0: # %entry
12272 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0
12273 ; NoVLX-NEXT: kmovw %k0, %eax
12274 ; NoVLX-NEXT: andl %edi, %eax
12275 ; NoVLX-NEXT: vzeroupper
12278 %0 = bitcast <8 x i64> %__a to <16 x i32>
12279 %load = load <8 x i64>, ptr %__b
12280 %1 = bitcast <8 x i64> %load to <16 x i32>
12281 %2 = icmp sge <16 x i32> %0, %1
12282 %3 = bitcast i16 %__u to <16 x i1>
12283 %4 = and <16 x i1> %2, %3
12284 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12285 %6 = bitcast <64 x i1> %5 to i64
12290 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
12291 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12292 ; VLX: # %bb.0: # %entry
12293 ; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12294 ; VLX-NEXT: kmovq %k0, %rax
12295 ; VLX-NEXT: vzeroupper
12298 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12299 ; NoVLX: # %bb.0: # %entry
12300 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12301 ; NoVLX-NEXT: kmovw %k0, %eax
12302 ; NoVLX-NEXT: vzeroupper
12305 %0 = bitcast <8 x i64> %__a to <16 x i32>
12306 %load = load i32, ptr %__b
12307 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12308 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12309 %2 = icmp sge <16 x i32> %0, %1
12310 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12311 %4 = bitcast <64 x i1> %3 to i64
12315 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
12316 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12317 ; VLX: # %bb.0: # %entry
12318 ; VLX-NEXT: kmovd %edi, %k1
12319 ; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12320 ; VLX-NEXT: kmovq %k0, %rax
12321 ; VLX-NEXT: vzeroupper
12324 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12325 ; NoVLX: # %bb.0: # %entry
12326 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12327 ; NoVLX-NEXT: kmovw %k0, %eax
12328 ; NoVLX-NEXT: andl %edi, %eax
12329 ; NoVLX-NEXT: vzeroupper
12332 %0 = bitcast <8 x i64> %__a to <16 x i32>
12333 %load = load i32, ptr %__b
12334 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12335 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12336 %2 = icmp sge <16 x i32> %0, %1
12337 %3 = bitcast i16 %__u to <16 x i1>
12338 %4 = and <16 x i1> %3, %2
12339 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12340 %6 = bitcast <64 x i1> %5 to i64
12345 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12346 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12347 ; VLX: # %bb.0: # %entry
12348 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12349 ; VLX-NEXT: kmovb %k0, %eax
12352 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12353 ; NoVLX: # %bb.0: # %entry
12354 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12355 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12356 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12357 ; NoVLX-NEXT: kmovw %k0, %eax
12358 ; NoVLX-NEXT: andl $3, %eax
12359 ; NoVLX-NEXT: vzeroupper
12362 %0 = bitcast <2 x i64> %__a to <2 x i64>
12363 %1 = bitcast <2 x i64> %__b to <2 x i64>
12364 %2 = icmp sge <2 x i64> %0, %1
12365 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12366 %4 = bitcast <4 x i1> %3 to i4
12370 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12371 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12372 ; VLX: # %bb.0: # %entry
12373 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12374 ; VLX-NEXT: kmovb %k0, %eax
12377 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12378 ; NoVLX: # %bb.0: # %entry
12379 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12380 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12381 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12382 ; NoVLX-NEXT: kmovw %k0, %eax
12383 ; NoVLX-NEXT: andl $3, %eax
12384 ; NoVLX-NEXT: vzeroupper
12387 %0 = bitcast <2 x i64> %__a to <2 x i64>
12388 %load = load <2 x i64>, ptr %__b
12389 %1 = bitcast <2 x i64> %load to <2 x i64>
12390 %2 = icmp sge <2 x i64> %0, %1
12391 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12392 %4 = bitcast <4 x i1> %3 to i4
12396 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12397 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12398 ; VLX: # %bb.0: # %entry
12399 ; VLX-NEXT: kmovd %edi, %k1
12400 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12401 ; VLX-NEXT: kmovb %k0, %eax
12404 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12405 ; NoVLX: # %bb.0: # %entry
12406 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12407 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12408 ; NoVLX-NEXT: kmovw %edi, %k1
12409 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12410 ; NoVLX-NEXT: kmovw %k0, %eax
12411 ; NoVLX-NEXT: andl $3, %eax
12412 ; NoVLX-NEXT: vzeroupper
12415 %0 = bitcast <2 x i64> %__a to <2 x i64>
12416 %1 = bitcast <2 x i64> %__b to <2 x i64>
12417 %2 = icmp sge <2 x i64> %0, %1
12418 %3 = bitcast i8 %__u to <8 x i1>
12419 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12420 %4 = and <2 x i1> %2, %extract.i
12421 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12422 %6 = bitcast <4 x i1> %5 to i4
12426 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12427 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12428 ; VLX: # %bb.0: # %entry
12429 ; VLX-NEXT: kmovd %edi, %k1
12430 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12431 ; VLX-NEXT: kmovb %k0, %eax
12434 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12435 ; NoVLX: # %bb.0: # %entry
12436 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12437 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12438 ; NoVLX-NEXT: kmovw %edi, %k1
12439 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12440 ; NoVLX-NEXT: kmovw %k0, %eax
12441 ; NoVLX-NEXT: andl $3, %eax
12442 ; NoVLX-NEXT: vzeroupper
12445 %0 = bitcast <2 x i64> %__a to <2 x i64>
12446 %load = load <2 x i64>, ptr %__b
12447 %1 = bitcast <2 x i64> %load to <2 x i64>
12448 %2 = icmp sge <2 x i64> %0, %1
12449 %3 = bitcast i8 %__u to <8 x i1>
12450 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12451 %4 = and <2 x i1> %2, %extract.i
12452 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12453 %6 = bitcast <4 x i1> %5 to i4
12458 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12459 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12460 ; VLX: # %bb.0: # %entry
12461 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12462 ; VLX-NEXT: kmovb %k0, %eax
12465 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12466 ; NoVLX: # %bb.0: # %entry
12467 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12468 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
12469 ; NoVLX-NEXT: kmovw %k0, %eax
12470 ; NoVLX-NEXT: andl $3, %eax
12471 ; NoVLX-NEXT: vzeroupper
12474 %0 = bitcast <2 x i64> %__a to <2 x i64>
12475 %load = load i64, ptr %__b
12476 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12477 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12478 %2 = icmp sge <2 x i64> %0, %1
12479 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12480 %4 = bitcast <4 x i1> %3 to i4
12484 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12485 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12486 ; VLX: # %bb.0: # %entry
12487 ; VLX-NEXT: kmovd %edi, %k1
12488 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12489 ; VLX-NEXT: kmovb %k0, %eax
12492 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12493 ; NoVLX: # %bb.0: # %entry
12494 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12495 ; NoVLX-NEXT: kmovw %edi, %k1
12496 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
12497 ; NoVLX-NEXT: kmovw %k0, %eax
12498 ; NoVLX-NEXT: andl $3, %eax
12499 ; NoVLX-NEXT: vzeroupper
12502 %0 = bitcast <2 x i64> %__a to <2 x i64>
12503 %load = load i64, ptr %__b
12504 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12505 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12506 %2 = icmp sge <2 x i64> %0, %1
12507 %3 = bitcast i8 %__u to <8 x i1>
12508 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12509 %4 = and <2 x i1> %extract.i, %2
12510 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12511 %6 = bitcast <4 x i1> %5 to i4
12516 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12517 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12518 ; VLX: # %bb.0: # %entry
12519 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12520 ; VLX-NEXT: kmovd %k0, %eax
12521 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12524 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12525 ; NoVLX: # %bb.0: # %entry
12526 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12527 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12528 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12529 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12530 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12531 ; NoVLX-NEXT: kmovw %k0, %eax
12532 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12533 ; NoVLX-NEXT: vzeroupper
12536 %0 = bitcast <2 x i64> %__a to <2 x i64>
12537 %1 = bitcast <2 x i64> %__b to <2 x i64>
12538 %2 = icmp sge <2 x i64> %0, %1
12539 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12540 %4 = bitcast <8 x i1> %3 to i8
12544 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12545 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12546 ; VLX: # %bb.0: # %entry
12547 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12548 ; VLX-NEXT: kmovd %k0, %eax
12549 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12552 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12553 ; NoVLX: # %bb.0: # %entry
12554 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12555 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12556 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12557 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12558 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12559 ; NoVLX-NEXT: kmovw %k0, %eax
12560 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12561 ; NoVLX-NEXT: vzeroupper
12564 %0 = bitcast <2 x i64> %__a to <2 x i64>
12565 %load = load <2 x i64>, ptr %__b
12566 %1 = bitcast <2 x i64> %load to <2 x i64>
12567 %2 = icmp sge <2 x i64> %0, %1
12568 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12569 %4 = bitcast <8 x i1> %3 to i8
12573 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12574 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12575 ; VLX: # %bb.0: # %entry
12576 ; VLX-NEXT: kmovd %edi, %k1
12577 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12578 ; VLX-NEXT: kmovd %k0, %eax
12579 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12582 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12583 ; NoVLX: # %bb.0: # %entry
12584 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12585 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12586 ; NoVLX-NEXT: kmovw %edi, %k1
12587 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12588 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12589 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12590 ; NoVLX-NEXT: kmovw %k0, %eax
12591 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12592 ; NoVLX-NEXT: vzeroupper
12595 %0 = bitcast <2 x i64> %__a to <2 x i64>
12596 %1 = bitcast <2 x i64> %__b to <2 x i64>
12597 %2 = icmp sge <2 x i64> %0, %1
12598 %3 = bitcast i8 %__u to <8 x i1>
12599 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12600 %4 = and <2 x i1> %2, %extract.i
12601 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12602 %6 = bitcast <8 x i1> %5 to i8
12606 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12607 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12608 ; VLX: # %bb.0: # %entry
12609 ; VLX-NEXT: kmovd %edi, %k1
12610 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12611 ; VLX-NEXT: kmovd %k0, %eax
12612 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12615 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12616 ; NoVLX: # %bb.0: # %entry
12617 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12618 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12619 ; NoVLX-NEXT: kmovw %edi, %k1
12620 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12621 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12622 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12623 ; NoVLX-NEXT: kmovw %k0, %eax
12624 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12625 ; NoVLX-NEXT: vzeroupper
12628 %0 = bitcast <2 x i64> %__a to <2 x i64>
12629 %load = load <2 x i64>, ptr %__b
12630 %1 = bitcast <2 x i64> %load to <2 x i64>
12631 %2 = icmp sge <2 x i64> %0, %1
12632 %3 = bitcast i8 %__u to <8 x i1>
12633 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12634 %4 = and <2 x i1> %2, %extract.i
12635 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12636 %6 = bitcast <8 x i1> %5 to i8
12641 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12642 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12643 ; VLX: # %bb.0: # %entry
12644 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12645 ; VLX-NEXT: kmovd %k0, %eax
12646 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12649 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12650 ; NoVLX: # %bb.0: # %entry
12651 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12652 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
12653 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12654 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12655 ; NoVLX-NEXT: kmovw %k0, %eax
12656 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12657 ; NoVLX-NEXT: vzeroupper
12660 %0 = bitcast <2 x i64> %__a to <2 x i64>
12661 %load = load i64, ptr %__b
12662 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12663 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12664 %2 = icmp sge <2 x i64> %0, %1
12665 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12666 %4 = bitcast <8 x i1> %3 to i8
12670 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12671 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12672 ; VLX: # %bb.0: # %entry
12673 ; VLX-NEXT: kmovd %edi, %k1
12674 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12675 ; VLX-NEXT: kmovd %k0, %eax
12676 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12679 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12680 ; NoVLX: # %bb.0: # %entry
12681 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12682 ; NoVLX-NEXT: kmovw %edi, %k1
12683 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
12684 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12685 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12686 ; NoVLX-NEXT: kmovw %k0, %eax
12687 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12688 ; NoVLX-NEXT: vzeroupper
12691 %0 = bitcast <2 x i64> %__a to <2 x i64>
12692 %load = load i64, ptr %__b
12693 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12694 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12695 %2 = icmp sge <2 x i64> %0, %1
12696 %3 = bitcast i8 %__u to <8 x i1>
12697 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12698 %4 = and <2 x i1> %extract.i, %2
12699 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12700 %6 = bitcast <8 x i1> %5 to i8
12705 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12706 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12707 ; VLX: # %bb.0: # %entry
12708 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12709 ; VLX-NEXT: kmovd %k0, %eax
12710 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12713 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12714 ; NoVLX: # %bb.0: # %entry
12715 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12716 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12717 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12718 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12719 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12720 ; NoVLX-NEXT: kmovw %k0, %eax
12721 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12722 ; NoVLX-NEXT: vzeroupper
12725 %0 = bitcast <2 x i64> %__a to <2 x i64>
12726 %1 = bitcast <2 x i64> %__b to <2 x i64>
12727 %2 = icmp sge <2 x i64> %0, %1
12728 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12729 %4 = bitcast <16 x i1> %3 to i16
12733 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12734 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12735 ; VLX: # %bb.0: # %entry
12736 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12737 ; VLX-NEXT: kmovd %k0, %eax
12738 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12741 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12742 ; NoVLX: # %bb.0: # %entry
12743 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12744 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12745 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12746 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12747 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12748 ; NoVLX-NEXT: kmovw %k0, %eax
12749 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12750 ; NoVLX-NEXT: vzeroupper
12753 %0 = bitcast <2 x i64> %__a to <2 x i64>
12754 %load = load <2 x i64>, ptr %__b
12755 %1 = bitcast <2 x i64> %load to <2 x i64>
12756 %2 = icmp sge <2 x i64> %0, %1
12757 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12758 %4 = bitcast <16 x i1> %3 to i16
12762 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12763 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
12764 ; VLX: # %bb.0: # %entry
12765 ; VLX-NEXT: kmovd %edi, %k1
12766 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12767 ; VLX-NEXT: kmovd %k0, %eax
12768 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12771 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
12772 ; NoVLX: # %bb.0: # %entry
12773 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12774 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12775 ; NoVLX-NEXT: kmovw %edi, %k1
12776 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12777 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12778 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12779 ; NoVLX-NEXT: kmovw %k0, %eax
12780 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12781 ; NoVLX-NEXT: vzeroupper
12784 %0 = bitcast <2 x i64> %__a to <2 x i64>
12785 %1 = bitcast <2 x i64> %__b to <2 x i64>
12786 %2 = icmp sge <2 x i64> %0, %1
12787 %3 = bitcast i8 %__u to <8 x i1>
12788 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12789 %4 = and <2 x i1> %2, %extract.i
12790 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12791 %6 = bitcast <16 x i1> %5 to i16
12795 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12796 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
12797 ; VLX: # %bb.0: # %entry
12798 ; VLX-NEXT: kmovd %edi, %k1
12799 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12800 ; VLX-NEXT: kmovd %k0, %eax
12801 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12804 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
12805 ; NoVLX: # %bb.0: # %entry
12806 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12807 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12808 ; NoVLX-NEXT: kmovw %edi, %k1
12809 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12810 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12811 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12812 ; NoVLX-NEXT: kmovw %k0, %eax
12813 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12814 ; NoVLX-NEXT: vzeroupper
12817 %0 = bitcast <2 x i64> %__a to <2 x i64>
12818 %load = load <2 x i64>, ptr %__b
12819 %1 = bitcast <2 x i64> %load to <2 x i64>
12820 %2 = icmp sge <2 x i64> %0, %1
12821 %3 = bitcast i8 %__u to <8 x i1>
12822 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12823 %4 = and <2 x i1> %2, %extract.i
12824 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12825 %6 = bitcast <16 x i1> %5 to i16
12830 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12831 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12832 ; VLX: # %bb.0: # %entry
12833 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12834 ; VLX-NEXT: kmovd %k0, %eax
12835 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12838 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12839 ; NoVLX: # %bb.0: # %entry
12840 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12841 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
12842 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12843 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12844 ; NoVLX-NEXT: kmovw %k0, %eax
12845 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12846 ; NoVLX-NEXT: vzeroupper
12849 %0 = bitcast <2 x i64> %__a to <2 x i64>
12850 %load = load i64, ptr %__b
12851 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12852 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12853 %2 = icmp sge <2 x i64> %0, %1
12854 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12855 %4 = bitcast <16 x i1> %3 to i16
12859 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12860 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12861 ; VLX: # %bb.0: # %entry
12862 ; VLX-NEXT: kmovd %edi, %k1
12863 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12864 ; VLX-NEXT: kmovd %k0, %eax
12865 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12868 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
12869 ; NoVLX: # %bb.0: # %entry
12870 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12871 ; NoVLX-NEXT: kmovw %edi, %k1
12872 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
12873 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12874 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12875 ; NoVLX-NEXT: kmovw %k0, %eax
12876 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12877 ; NoVLX-NEXT: vzeroupper
12880 %0 = bitcast <2 x i64> %__a to <2 x i64>
12881 %load = load i64, ptr %__b
12882 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12883 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12884 %2 = icmp sge <2 x i64> %0, %1
12885 %3 = bitcast i8 %__u to <8 x i1>
12886 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12887 %4 = and <2 x i1> %extract.i, %2
12888 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12889 %6 = bitcast <16 x i1> %5 to i16
12894 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12895 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
12896 ; VLX: # %bb.0: # %entry
12897 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12898 ; VLX-NEXT: kmovd %k0, %eax
12901 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
12902 ; NoVLX: # %bb.0: # %entry
12903 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12904 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12905 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12906 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12907 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12908 ; NoVLX-NEXT: kmovw %k0, %eax
12909 ; NoVLX-NEXT: vzeroupper
12912 %0 = bitcast <2 x i64> %__a to <2 x i64>
12913 %1 = bitcast <2 x i64> %__b to <2 x i64>
12914 %2 = icmp sge <2 x i64> %0, %1
12915 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12916 %4 = bitcast <32 x i1> %3 to i32
12920 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
12921 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
12922 ; VLX: # %bb.0: # %entry
12923 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12924 ; VLX-NEXT: kmovd %k0, %eax
12927 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
12928 ; NoVLX: # %bb.0: # %entry
12929 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12930 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12931 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12932 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12933 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12934 ; NoVLX-NEXT: kmovw %k0, %eax
12935 ; NoVLX-NEXT: vzeroupper
12938 %0 = bitcast <2 x i64> %__a to <2 x i64>
12939 %load = load <2 x i64>, ptr %__b
12940 %1 = bitcast <2 x i64> %load to <2 x i64>
12941 %2 = icmp sge <2 x i64> %0, %1
12942 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12943 %4 = bitcast <32 x i1> %3 to i32
12947 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12948 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
12949 ; VLX: # %bb.0: # %entry
12950 ; VLX-NEXT: kmovd %edi, %k1
12951 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12952 ; VLX-NEXT: kmovd %k0, %eax
12955 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
12956 ; NoVLX: # %bb.0: # %entry
12957 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12958 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12959 ; NoVLX-NEXT: kmovw %edi, %k1
12960 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12961 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12962 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12963 ; NoVLX-NEXT: kmovw %k0, %eax
12964 ; NoVLX-NEXT: vzeroupper
12967 %0 = bitcast <2 x i64> %__a to <2 x i64>
12968 %1 = bitcast <2 x i64> %__b to <2 x i64>
12969 %2 = icmp sge <2 x i64> %0, %1
12970 %3 = bitcast i8 %__u to <8 x i1>
12971 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12972 %4 = and <2 x i1> %2, %extract.i
12973 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12974 %6 = bitcast <32 x i1> %5 to i32
12978 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
12979 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
12980 ; VLX: # %bb.0: # %entry
12981 ; VLX-NEXT: kmovd %edi, %k1
12982 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12983 ; VLX-NEXT: kmovd %k0, %eax
12986 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
12987 ; NoVLX: # %bb.0: # %entry
12988 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12989 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12990 ; NoVLX-NEXT: kmovw %edi, %k1
12991 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12992 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12993 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12994 ; NoVLX-NEXT: kmovw %k0, %eax
12995 ; NoVLX-NEXT: vzeroupper
12998 %0 = bitcast <2 x i64> %__a to <2 x i64>
12999 %load = load <2 x i64>, ptr %__b
13000 %1 = bitcast <2 x i64> %load to <2 x i64>
13001 %2 = icmp sge <2 x i64> %0, %1
13002 %3 = bitcast i8 %__u to <8 x i1>
13003 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13004 %4 = and <2 x i1> %2, %extract.i
13005 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13006 %6 = bitcast <32 x i1> %5 to i32
13011 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
13012 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13013 ; VLX: # %bb.0: # %entry
13014 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13015 ; VLX-NEXT: kmovd %k0, %eax
13018 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13019 ; NoVLX: # %bb.0: # %entry
13020 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13021 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13022 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13023 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13024 ; NoVLX-NEXT: kmovw %k0, %eax
13025 ; NoVLX-NEXT: vzeroupper
13028 %0 = bitcast <2 x i64> %__a to <2 x i64>
13029 %load = load i64, ptr %__b
13030 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13031 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13032 %2 = icmp sge <2 x i64> %0, %1
13033 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13034 %4 = bitcast <32 x i1> %3 to i32
13038 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
13039 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13040 ; VLX: # %bb.0: # %entry
13041 ; VLX-NEXT: kmovd %edi, %k1
13042 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13043 ; VLX-NEXT: kmovd %k0, %eax
13046 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13047 ; NoVLX: # %bb.0: # %entry
13048 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13049 ; NoVLX-NEXT: kmovw %edi, %k1
13050 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13051 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13052 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13053 ; NoVLX-NEXT: kmovw %k0, %eax
13054 ; NoVLX-NEXT: vzeroupper
13057 %0 = bitcast <2 x i64> %__a to <2 x i64>
13058 %load = load i64, ptr %__b
13059 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13060 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13061 %2 = icmp sge <2 x i64> %0, %1
13062 %3 = bitcast i8 %__u to <8 x i1>
13063 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13064 %4 = and <2 x i1> %extract.i, %2
13065 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13066 %6 = bitcast <32 x i1> %5 to i32
13071 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13072 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13073 ; VLX: # %bb.0: # %entry
13074 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
13075 ; VLX-NEXT: kmovq %k0, %rax
13078 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13079 ; NoVLX: # %bb.0: # %entry
13080 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13081 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13082 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13083 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13084 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13085 ; NoVLX-NEXT: kmovw %k0, %eax
13086 ; NoVLX-NEXT: vzeroupper
13089 %0 = bitcast <2 x i64> %__a to <2 x i64>
13090 %1 = bitcast <2 x i64> %__b to <2 x i64>
13091 %2 = icmp sge <2 x i64> %0, %1
13092 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13093 %4 = bitcast <64 x i1> %3 to i64
13097 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
13098 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13099 ; VLX: # %bb.0: # %entry
13100 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
13101 ; VLX-NEXT: kmovq %k0, %rax
13104 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13105 ; NoVLX: # %bb.0: # %entry
13106 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13107 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
13108 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13109 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13110 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13111 ; NoVLX-NEXT: kmovw %k0, %eax
13112 ; NoVLX-NEXT: vzeroupper
13115 %0 = bitcast <2 x i64> %__a to <2 x i64>
13116 %load = load <2 x i64>, ptr %__b
13117 %1 = bitcast <2 x i64> %load to <2 x i64>
13118 %2 = icmp sge <2 x i64> %0, %1
13119 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13120 %4 = bitcast <64 x i1> %3 to i64
13124 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13125 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13126 ; VLX: # %bb.0: # %entry
13127 ; VLX-NEXT: kmovd %edi, %k1
13128 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
13129 ; VLX-NEXT: kmovq %k0, %rax
13132 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13133 ; NoVLX: # %bb.0: # %entry
13134 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13135 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13136 ; NoVLX-NEXT: kmovw %edi, %k1
13137 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13138 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13139 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13140 ; NoVLX-NEXT: kmovw %k0, %eax
13141 ; NoVLX-NEXT: vzeroupper
13144 %0 = bitcast <2 x i64> %__a to <2 x i64>
13145 %1 = bitcast <2 x i64> %__b to <2 x i64>
13146 %2 = icmp sge <2 x i64> %0, %1
13147 %3 = bitcast i8 %__u to <8 x i1>
13148 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13149 %4 = and <2 x i1> %2, %extract.i
13150 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13151 %6 = bitcast <64 x i1> %5 to i64
13155 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
13156 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13157 ; VLX: # %bb.0: # %entry
13158 ; VLX-NEXT: kmovd %edi, %k1
13159 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
13160 ; VLX-NEXT: kmovq %k0, %rax
13163 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13164 ; NoVLX: # %bb.0: # %entry
13165 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13166 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
13167 ; NoVLX-NEXT: kmovw %edi, %k1
13168 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13169 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13170 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13171 ; NoVLX-NEXT: kmovw %k0, %eax
13172 ; NoVLX-NEXT: vzeroupper
13175 %0 = bitcast <2 x i64> %__a to <2 x i64>
13176 %load = load <2 x i64>, ptr %__b
13177 %1 = bitcast <2 x i64> %load to <2 x i64>
13178 %2 = icmp sge <2 x i64> %0, %1
13179 %3 = bitcast i8 %__u to <8 x i1>
13180 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13181 %4 = and <2 x i1> %2, %extract.i
13182 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13183 %6 = bitcast <64 x i1> %5 to i64
13188 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
13189 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13190 ; VLX: # %bb.0: # %entry
13191 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13192 ; VLX-NEXT: kmovq %k0, %rax
13195 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13196 ; NoVLX: # %bb.0: # %entry
13197 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13198 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13199 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13200 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13201 ; NoVLX-NEXT: kmovw %k0, %eax
13202 ; NoVLX-NEXT: vzeroupper
13205 %0 = bitcast <2 x i64> %__a to <2 x i64>
13206 %load = load i64, ptr %__b
13207 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13208 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13209 %2 = icmp sge <2 x i64> %0, %1
13210 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13211 %4 = bitcast <64 x i1> %3 to i64
13215 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
13216 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13217 ; VLX: # %bb.0: # %entry
13218 ; VLX-NEXT: kmovd %edi, %k1
13219 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13220 ; VLX-NEXT: kmovq %k0, %rax
13223 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13224 ; NoVLX: # %bb.0: # %entry
13225 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13226 ; NoVLX-NEXT: kmovw %edi, %k1
13227 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13228 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13229 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13230 ; NoVLX-NEXT: kmovw %k0, %eax
13231 ; NoVLX-NEXT: vzeroupper
13234 %0 = bitcast <2 x i64> %__a to <2 x i64>
13235 %load = load i64, ptr %__b
13236 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13237 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13238 %2 = icmp sge <2 x i64> %0, %1
13239 %3 = bitcast i8 %__u to <8 x i1>
13240 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13241 %4 = and <2 x i1> %extract.i, %2
13242 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13243 %6 = bitcast <64 x i1> %5 to i64
13248 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13249 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13250 ; VLX: # %bb.0: # %entry
13251 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13252 ; VLX-NEXT: kmovd %k0, %eax
13253 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13254 ; VLX-NEXT: vzeroupper
13257 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13258 ; NoVLX: # %bb.0: # %entry
13259 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13260 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13261 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13262 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13263 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13264 ; NoVLX-NEXT: kmovw %k0, %eax
13265 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13266 ; NoVLX-NEXT: vzeroupper
13269 %0 = bitcast <4 x i64> %__a to <4 x i64>
13270 %1 = bitcast <4 x i64> %__b to <4 x i64>
13271 %2 = icmp sge <4 x i64> %0, %1
13272 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13273 %4 = bitcast <8 x i1> %3 to i8
13277 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13278 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13279 ; VLX: # %bb.0: # %entry
13280 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13281 ; VLX-NEXT: kmovd %k0, %eax
13282 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13283 ; VLX-NEXT: vzeroupper
13286 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13287 ; NoVLX: # %bb.0: # %entry
13288 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13289 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13290 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13291 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13292 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13293 ; NoVLX-NEXT: kmovw %k0, %eax
13294 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13295 ; NoVLX-NEXT: vzeroupper
13298 %0 = bitcast <4 x i64> %__a to <4 x i64>
13299 %load = load <4 x i64>, ptr %__b
13300 %1 = bitcast <4 x i64> %load to <4 x i64>
13301 %2 = icmp sge <4 x i64> %0, %1
13302 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13303 %4 = bitcast <8 x i1> %3 to i8
13307 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13308 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13309 ; VLX: # %bb.0: # %entry
13310 ; VLX-NEXT: kmovd %edi, %k1
13311 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13312 ; VLX-NEXT: kmovd %k0, %eax
13313 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13314 ; VLX-NEXT: vzeroupper
13317 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13318 ; NoVLX: # %bb.0: # %entry
13319 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13320 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13321 ; NoVLX-NEXT: kmovw %edi, %k1
13322 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13323 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13324 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13325 ; NoVLX-NEXT: kmovw %k0, %eax
13326 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13327 ; NoVLX-NEXT: vzeroupper
13330 %0 = bitcast <4 x i64> %__a to <4 x i64>
13331 %1 = bitcast <4 x i64> %__b to <4 x i64>
13332 %2 = icmp sge <4 x i64> %0, %1
13333 %3 = bitcast i8 %__u to <8 x i1>
13334 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13335 %4 = and <4 x i1> %2, %extract.i
13336 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13337 %6 = bitcast <8 x i1> %5 to i8
13341 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13342 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13343 ; VLX: # %bb.0: # %entry
13344 ; VLX-NEXT: kmovd %edi, %k1
13345 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13346 ; VLX-NEXT: kmovd %k0, %eax
13347 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13348 ; VLX-NEXT: vzeroupper
13351 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13352 ; NoVLX: # %bb.0: # %entry
13353 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13354 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13355 ; NoVLX-NEXT: kmovw %edi, %k1
13356 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13357 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13358 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13359 ; NoVLX-NEXT: kmovw %k0, %eax
13360 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13361 ; NoVLX-NEXT: vzeroupper
13364 %0 = bitcast <4 x i64> %__a to <4 x i64>
13365 %load = load <4 x i64>, ptr %__b
13366 %1 = bitcast <4 x i64> %load to <4 x i64>
13367 %2 = icmp sge <4 x i64> %0, %1
13368 %3 = bitcast i8 %__u to <8 x i1>
13369 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13370 %4 = and <4 x i1> %2, %extract.i
13371 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13372 %6 = bitcast <8 x i1> %5 to i8
13377 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13378 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13379 ; VLX: # %bb.0: # %entry
13380 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13381 ; VLX-NEXT: kmovd %k0, %eax
13382 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13383 ; VLX-NEXT: vzeroupper
13386 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13387 ; NoVLX: # %bb.0: # %entry
13388 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13389 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13390 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13391 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13392 ; NoVLX-NEXT: kmovw %k0, %eax
13393 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13394 ; NoVLX-NEXT: vzeroupper
13397 %0 = bitcast <4 x i64> %__a to <4 x i64>
13398 %load = load i64, ptr %__b
13399 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13400 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13401 %2 = icmp sge <4 x i64> %0, %1
13402 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13403 %4 = bitcast <8 x i1> %3 to i8
13407 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13408 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13409 ; VLX: # %bb.0: # %entry
13410 ; VLX-NEXT: kmovd %edi, %k1
13411 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13412 ; VLX-NEXT: kmovd %k0, %eax
13413 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13414 ; VLX-NEXT: vzeroupper
13417 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13418 ; NoVLX: # %bb.0: # %entry
13419 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13420 ; NoVLX-NEXT: kmovw %edi, %k1
13421 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13422 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13423 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13424 ; NoVLX-NEXT: kmovw %k0, %eax
13425 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13426 ; NoVLX-NEXT: vzeroupper
13429 %0 = bitcast <4 x i64> %__a to <4 x i64>
13430 %load = load i64, ptr %__b
13431 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13432 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13433 %2 = icmp sge <4 x i64> %0, %1
13434 %3 = bitcast i8 %__u to <8 x i1>
13435 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13436 %4 = and <4 x i1> %extract.i, %2
13437 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13438 %6 = bitcast <8 x i1> %5 to i8
13443 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13444 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13445 ; VLX: # %bb.0: # %entry
13446 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13447 ; VLX-NEXT: kmovd %k0, %eax
13448 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13449 ; VLX-NEXT: vzeroupper
13452 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13453 ; NoVLX: # %bb.0: # %entry
13454 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13455 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13456 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13457 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13458 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13459 ; NoVLX-NEXT: kmovw %k0, %eax
13460 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13461 ; NoVLX-NEXT: vzeroupper
13464 %0 = bitcast <4 x i64> %__a to <4 x i64>
13465 %1 = bitcast <4 x i64> %__b to <4 x i64>
13466 %2 = icmp sge <4 x i64> %0, %1
13467 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13468 %4 = bitcast <16 x i1> %3 to i16
13472 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13473 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13474 ; VLX: # %bb.0: # %entry
13475 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13476 ; VLX-NEXT: kmovd %k0, %eax
13477 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13478 ; VLX-NEXT: vzeroupper
13481 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13482 ; NoVLX: # %bb.0: # %entry
13483 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13484 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13485 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13486 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13487 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13488 ; NoVLX-NEXT: kmovw %k0, %eax
13489 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13490 ; NoVLX-NEXT: vzeroupper
13493 %0 = bitcast <4 x i64> %__a to <4 x i64>
13494 %load = load <4 x i64>, ptr %__b
13495 %1 = bitcast <4 x i64> %load to <4 x i64>
13496 %2 = icmp sge <4 x i64> %0, %1
13497 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13498 %4 = bitcast <16 x i1> %3 to i16
13502 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13503 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13504 ; VLX: # %bb.0: # %entry
13505 ; VLX-NEXT: kmovd %edi, %k1
13506 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13507 ; VLX-NEXT: kmovd %k0, %eax
13508 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13509 ; VLX-NEXT: vzeroupper
13512 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13513 ; NoVLX: # %bb.0: # %entry
13514 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13515 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13516 ; NoVLX-NEXT: kmovw %edi, %k1
13517 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13518 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13519 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13520 ; NoVLX-NEXT: kmovw %k0, %eax
13521 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13522 ; NoVLX-NEXT: vzeroupper
13525 %0 = bitcast <4 x i64> %__a to <4 x i64>
13526 %1 = bitcast <4 x i64> %__b to <4 x i64>
13527 %2 = icmp sge <4 x i64> %0, %1
13528 %3 = bitcast i8 %__u to <8 x i1>
13529 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13530 %4 = and <4 x i1> %2, %extract.i
13531 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13532 %6 = bitcast <16 x i1> %5 to i16
13536 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13537 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13538 ; VLX: # %bb.0: # %entry
13539 ; VLX-NEXT: kmovd %edi, %k1
13540 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13541 ; VLX-NEXT: kmovd %k0, %eax
13542 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13543 ; VLX-NEXT: vzeroupper
13546 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13547 ; NoVLX: # %bb.0: # %entry
13548 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13549 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13550 ; NoVLX-NEXT: kmovw %edi, %k1
13551 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13552 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13553 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13554 ; NoVLX-NEXT: kmovw %k0, %eax
13555 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13556 ; NoVLX-NEXT: vzeroupper
13559 %0 = bitcast <4 x i64> %__a to <4 x i64>
13560 %load = load <4 x i64>, ptr %__b
13561 %1 = bitcast <4 x i64> %load to <4 x i64>
13562 %2 = icmp sge <4 x i64> %0, %1
13563 %3 = bitcast i8 %__u to <8 x i1>
13564 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13565 %4 = and <4 x i1> %2, %extract.i
13566 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13567 %6 = bitcast <16 x i1> %5 to i16
13572 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13573 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13574 ; VLX: # %bb.0: # %entry
13575 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13576 ; VLX-NEXT: kmovd %k0, %eax
13577 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13578 ; VLX-NEXT: vzeroupper
13581 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13582 ; NoVLX: # %bb.0: # %entry
13583 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13584 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13585 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13586 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13587 ; NoVLX-NEXT: kmovw %k0, %eax
13588 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13589 ; NoVLX-NEXT: vzeroupper
13592 %0 = bitcast <4 x i64> %__a to <4 x i64>
13593 %load = load i64, ptr %__b
13594 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13595 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13596 %2 = icmp sge <4 x i64> %0, %1
13597 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13598 %4 = bitcast <16 x i1> %3 to i16
13602 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13603 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13604 ; VLX: # %bb.0: # %entry
13605 ; VLX-NEXT: kmovd %edi, %k1
13606 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13607 ; VLX-NEXT: kmovd %k0, %eax
13608 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13609 ; VLX-NEXT: vzeroupper
13612 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13613 ; NoVLX: # %bb.0: # %entry
13614 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13615 ; NoVLX-NEXT: kmovw %edi, %k1
13616 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13617 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13618 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13619 ; NoVLX-NEXT: kmovw %k0, %eax
13620 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13621 ; NoVLX-NEXT: vzeroupper
13624 %0 = bitcast <4 x i64> %__a to <4 x i64>
13625 %load = load i64, ptr %__b
13626 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13627 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13628 %2 = icmp sge <4 x i64> %0, %1
13629 %3 = bitcast i8 %__u to <8 x i1>
13630 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13631 %4 = and <4 x i1> %extract.i, %2
13632 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13633 %6 = bitcast <16 x i1> %5 to i16
13638 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13639 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13640 ; VLX: # %bb.0: # %entry
13641 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13642 ; VLX-NEXT: kmovd %k0, %eax
13643 ; VLX-NEXT: vzeroupper
13646 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13647 ; NoVLX: # %bb.0: # %entry
13648 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13649 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13650 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13651 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13652 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13653 ; NoVLX-NEXT: kmovw %k0, %eax
13654 ; NoVLX-NEXT: vzeroupper
13657 %0 = bitcast <4 x i64> %__a to <4 x i64>
13658 %1 = bitcast <4 x i64> %__b to <4 x i64>
13659 %2 = icmp sge <4 x i64> %0, %1
13660 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13661 %4 = bitcast <32 x i1> %3 to i32
13665 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13666 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13667 ; VLX: # %bb.0: # %entry
13668 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13669 ; VLX-NEXT: kmovd %k0, %eax
13670 ; VLX-NEXT: vzeroupper
13673 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13674 ; NoVLX: # %bb.0: # %entry
13675 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13676 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13677 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13678 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13679 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13680 ; NoVLX-NEXT: kmovw %k0, %eax
13681 ; NoVLX-NEXT: vzeroupper
13684 %0 = bitcast <4 x i64> %__a to <4 x i64>
13685 %load = load <4 x i64>, ptr %__b
13686 %1 = bitcast <4 x i64> %load to <4 x i64>
13687 %2 = icmp sge <4 x i64> %0, %1
13688 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13689 %4 = bitcast <32 x i1> %3 to i32
13693 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13694 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13695 ; VLX: # %bb.0: # %entry
13696 ; VLX-NEXT: kmovd %edi, %k1
13697 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13698 ; VLX-NEXT: kmovd %k0, %eax
13699 ; VLX-NEXT: vzeroupper
13702 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13703 ; NoVLX: # %bb.0: # %entry
13704 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13705 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13706 ; NoVLX-NEXT: kmovw %edi, %k1
13707 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13708 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13709 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13710 ; NoVLX-NEXT: kmovw %k0, %eax
13711 ; NoVLX-NEXT: vzeroupper
13714 %0 = bitcast <4 x i64> %__a to <4 x i64>
13715 %1 = bitcast <4 x i64> %__b to <4 x i64>
13716 %2 = icmp sge <4 x i64> %0, %1
13717 %3 = bitcast i8 %__u to <8 x i1>
13718 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13719 %4 = and <4 x i1> %2, %extract.i
13720 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13721 %6 = bitcast <32 x i1> %5 to i32
13725 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13726 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13727 ; VLX: # %bb.0: # %entry
13728 ; VLX-NEXT: kmovd %edi, %k1
13729 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13730 ; VLX-NEXT: kmovd %k0, %eax
13731 ; VLX-NEXT: vzeroupper
13734 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13735 ; NoVLX: # %bb.0: # %entry
13736 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13737 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13738 ; NoVLX-NEXT: kmovw %edi, %k1
13739 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13740 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13741 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13742 ; NoVLX-NEXT: kmovw %k0, %eax
13743 ; NoVLX-NEXT: vzeroupper
13746 %0 = bitcast <4 x i64> %__a to <4 x i64>
13747 %load = load <4 x i64>, ptr %__b
13748 %1 = bitcast <4 x i64> %load to <4 x i64>
13749 %2 = icmp sge <4 x i64> %0, %1
13750 %3 = bitcast i8 %__u to <8 x i1>
13751 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13752 %4 = and <4 x i1> %2, %extract.i
13753 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13754 %6 = bitcast <32 x i1> %5 to i32
13759 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13760 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13761 ; VLX: # %bb.0: # %entry
13762 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13763 ; VLX-NEXT: kmovd %k0, %eax
13764 ; VLX-NEXT: vzeroupper
13767 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13768 ; NoVLX: # %bb.0: # %entry
13769 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13770 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13771 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13772 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13773 ; NoVLX-NEXT: kmovw %k0, %eax
13774 ; NoVLX-NEXT: vzeroupper
13777 %0 = bitcast <4 x i64> %__a to <4 x i64>
13778 %load = load i64, ptr %__b
13779 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13780 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13781 %2 = icmp sge <4 x i64> %0, %1
13782 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13783 %4 = bitcast <32 x i1> %3 to i32
13787 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13788 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13789 ; VLX: # %bb.0: # %entry
13790 ; VLX-NEXT: kmovd %edi, %k1
13791 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13792 ; VLX-NEXT: kmovd %k0, %eax
13793 ; VLX-NEXT: vzeroupper
13796 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13797 ; NoVLX: # %bb.0: # %entry
13798 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13799 ; NoVLX-NEXT: kmovw %edi, %k1
13800 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13801 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13802 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13803 ; NoVLX-NEXT: kmovw %k0, %eax
13804 ; NoVLX-NEXT: vzeroupper
13807 %0 = bitcast <4 x i64> %__a to <4 x i64>
13808 %load = load i64, ptr %__b
13809 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13810 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13811 %2 = icmp sge <4 x i64> %0, %1
13812 %3 = bitcast i8 %__u to <8 x i1>
13813 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13814 %4 = and <4 x i1> %extract.i, %2
13815 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13816 %6 = bitcast <32 x i1> %5 to i32
13821 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13822 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
13823 ; VLX: # %bb.0: # %entry
13824 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13825 ; VLX-NEXT: kmovq %k0, %rax
13826 ; VLX-NEXT: vzeroupper
13829 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
13830 ; NoVLX: # %bb.0: # %entry
13831 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13832 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13833 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13834 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13835 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13836 ; NoVLX-NEXT: kmovw %k0, %eax
13837 ; NoVLX-NEXT: vzeroupper
13840 %0 = bitcast <4 x i64> %__a to <4 x i64>
13841 %1 = bitcast <4 x i64> %__b to <4 x i64>
13842 %2 = icmp sge <4 x i64> %0, %1
13843 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13844 %4 = bitcast <64 x i1> %3 to i64
13848 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13849 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
13850 ; VLX: # %bb.0: # %entry
13851 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13852 ; VLX-NEXT: kmovq %k0, %rax
13853 ; VLX-NEXT: vzeroupper
13856 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
13857 ; NoVLX: # %bb.0: # %entry
13858 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13859 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13860 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13861 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13862 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13863 ; NoVLX-NEXT: kmovw %k0, %eax
13864 ; NoVLX-NEXT: vzeroupper
13867 %0 = bitcast <4 x i64> %__a to <4 x i64>
13868 %load = load <4 x i64>, ptr %__b
13869 %1 = bitcast <4 x i64> %load to <4 x i64>
13870 %2 = icmp sge <4 x i64> %0, %1
13871 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13872 %4 = bitcast <64 x i1> %3 to i64
13876 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13877 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
13878 ; VLX: # %bb.0: # %entry
13879 ; VLX-NEXT: kmovd %edi, %k1
13880 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13881 ; VLX-NEXT: kmovq %k0, %rax
13882 ; VLX-NEXT: vzeroupper
13885 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
13886 ; NoVLX: # %bb.0: # %entry
13887 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13888 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13889 ; NoVLX-NEXT: kmovw %edi, %k1
13890 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13891 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13892 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13893 ; NoVLX-NEXT: kmovw %k0, %eax
13894 ; NoVLX-NEXT: vzeroupper
13897 %0 = bitcast <4 x i64> %__a to <4 x i64>
13898 %1 = bitcast <4 x i64> %__b to <4 x i64>
13899 %2 = icmp sge <4 x i64> %0, %1
13900 %3 = bitcast i8 %__u to <8 x i1>
13901 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13902 %4 = and <4 x i1> %2, %extract.i
13903 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13904 %6 = bitcast <64 x i1> %5 to i64
13908 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13909 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
13910 ; VLX: # %bb.0: # %entry
13911 ; VLX-NEXT: kmovd %edi, %k1
13912 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13913 ; VLX-NEXT: kmovq %k0, %rax
13914 ; VLX-NEXT: vzeroupper
13917 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
13918 ; NoVLX: # %bb.0: # %entry
13919 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13920 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13921 ; NoVLX-NEXT: kmovw %edi, %k1
13922 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13923 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13924 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13925 ; NoVLX-NEXT: kmovw %k0, %eax
13926 ; NoVLX-NEXT: vzeroupper
13929 %0 = bitcast <4 x i64> %__a to <4 x i64>
13930 %load = load <4 x i64>, ptr %__b
13931 %1 = bitcast <4 x i64> %load to <4 x i64>
13932 %2 = icmp sge <4 x i64> %0, %1
13933 %3 = bitcast i8 %__u to <8 x i1>
13934 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13935 %4 = and <4 x i1> %2, %extract.i
13936 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13937 %6 = bitcast <64 x i1> %5 to i64
13942 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
13943 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13944 ; VLX: # %bb.0: # %entry
13945 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13946 ; VLX-NEXT: kmovq %k0, %rax
13947 ; VLX-NEXT: vzeroupper
13950 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13951 ; NoVLX: # %bb.0: # %entry
13952 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13953 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
13954 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13955 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13956 ; NoVLX-NEXT: kmovw %k0, %eax
13957 ; NoVLX-NEXT: vzeroupper
13960 %0 = bitcast <4 x i64> %__a to <4 x i64>
13961 %load = load i64, ptr %__b
13962 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13963 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13964 %2 = icmp sge <4 x i64> %0, %1
13965 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13966 %4 = bitcast <64 x i1> %3 to i64
13970 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
13971 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13972 ; VLX: # %bb.0: # %entry
13973 ; VLX-NEXT: kmovd %edi, %k1
13974 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13975 ; VLX-NEXT: kmovq %k0, %rax
13976 ; VLX-NEXT: vzeroupper
13979 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
13980 ; NoVLX: # %bb.0: # %entry
13981 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13982 ; NoVLX-NEXT: kmovw %edi, %k1
13983 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
13984 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13985 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13986 ; NoVLX-NEXT: kmovw %k0, %eax
13987 ; NoVLX-NEXT: vzeroupper
13990 %0 = bitcast <4 x i64> %__a to <4 x i64>
13991 %load = load i64, ptr %__b
13992 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13993 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13994 %2 = icmp sge <4 x i64> %0, %1
13995 %3 = bitcast i8 %__u to <8 x i1>
13996 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13997 %4 = and <4 x i1> %extract.i, %2
13998 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13999 %6 = bitcast <64 x i1> %5 to i64
14004 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14005 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14006 ; VLX: # %bb.0: # %entry
14007 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14008 ; VLX-NEXT: kmovd %k0, %eax
14009 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14010 ; VLX-NEXT: vzeroupper
14013 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14014 ; NoVLX: # %bb.0: # %entry
14015 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14016 ; NoVLX-NEXT: kmovw %k0, %eax
14017 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14018 ; NoVLX-NEXT: vzeroupper
14021 %0 = bitcast <8 x i64> %__a to <8 x i64>
14022 %1 = bitcast <8 x i64> %__b to <8 x i64>
14023 %2 = icmp sge <8 x i64> %0, %1
14024 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14025 %4 = bitcast <16 x i1> %3 to i16
14029 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14030 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14031 ; VLX: # %bb.0: # %entry
14032 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14033 ; VLX-NEXT: kmovd %k0, %eax
14034 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14035 ; VLX-NEXT: vzeroupper
14038 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14039 ; NoVLX: # %bb.0: # %entry
14040 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14041 ; NoVLX-NEXT: kmovw %k0, %eax
14042 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14043 ; NoVLX-NEXT: vzeroupper
14046 %0 = bitcast <8 x i64> %__a to <8 x i64>
14047 %load = load <8 x i64>, ptr %__b
14048 %1 = bitcast <8 x i64> %load to <8 x i64>
14049 %2 = icmp sge <8 x i64> %0, %1
14050 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14051 %4 = bitcast <16 x i1> %3 to i16
14055 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14056 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14057 ; VLX: # %bb.0: # %entry
14058 ; VLX-NEXT: kmovd %edi, %k1
14059 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14060 ; VLX-NEXT: kmovd %k0, %eax
14061 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14062 ; VLX-NEXT: vzeroupper
14065 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14066 ; NoVLX: # %bb.0: # %entry
14067 ; NoVLX-NEXT: kmovw %edi, %k1
14068 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14069 ; NoVLX-NEXT: kmovw %k0, %eax
14070 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14071 ; NoVLX-NEXT: vzeroupper
14074 %0 = bitcast <8 x i64> %__a to <8 x i64>
14075 %1 = bitcast <8 x i64> %__b to <8 x i64>
14076 %2 = icmp sge <8 x i64> %0, %1
14077 %3 = bitcast i8 %__u to <8 x i1>
14078 %4 = and <8 x i1> %2, %3
14079 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14080 %6 = bitcast <16 x i1> %5 to i16
14084 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14085 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14086 ; VLX: # %bb.0: # %entry
14087 ; VLX-NEXT: kmovd %edi, %k1
14088 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14089 ; VLX-NEXT: kmovd %k0, %eax
14090 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14091 ; VLX-NEXT: vzeroupper
14094 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14095 ; NoVLX: # %bb.0: # %entry
14096 ; NoVLX-NEXT: kmovw %edi, %k1
14097 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14098 ; NoVLX-NEXT: kmovw %k0, %eax
14099 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14100 ; NoVLX-NEXT: vzeroupper
14103 %0 = bitcast <8 x i64> %__a to <8 x i64>
14104 %load = load <8 x i64>, ptr %__b
14105 %1 = bitcast <8 x i64> %load to <8 x i64>
14106 %2 = icmp sge <8 x i64> %0, %1
14107 %3 = bitcast i8 %__u to <8 x i1>
14108 %4 = and <8 x i1> %2, %3
14109 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14110 %6 = bitcast <16 x i1> %5 to i16
14115 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14116 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14117 ; VLX: # %bb.0: # %entry
14118 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14119 ; VLX-NEXT: kmovd %k0, %eax
14120 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14121 ; VLX-NEXT: vzeroupper
14124 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14125 ; NoVLX: # %bb.0: # %entry
14126 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14127 ; NoVLX-NEXT: kmovw %k0, %eax
14128 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14129 ; NoVLX-NEXT: vzeroupper
14132 %0 = bitcast <8 x i64> %__a to <8 x i64>
14133 %load = load i64, ptr %__b
14134 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14135 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14136 %2 = icmp sge <8 x i64> %0, %1
14137 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14138 %4 = bitcast <16 x i1> %3 to i16
14142 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14143 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14144 ; VLX: # %bb.0: # %entry
14145 ; VLX-NEXT: kmovd %edi, %k1
14146 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14147 ; VLX-NEXT: kmovd %k0, %eax
14148 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14149 ; VLX-NEXT: vzeroupper
14152 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14153 ; NoVLX: # %bb.0: # %entry
14154 ; NoVLX-NEXT: kmovw %edi, %k1
14155 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14156 ; NoVLX-NEXT: kmovw %k0, %eax
14157 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14158 ; NoVLX-NEXT: vzeroupper
14161 %0 = bitcast <8 x i64> %__a to <8 x i64>
14162 %load = load i64, ptr %__b
14163 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14164 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14165 %2 = icmp sge <8 x i64> %0, %1
14166 %3 = bitcast i8 %__u to <8 x i1>
14167 %4 = and <8 x i1> %3, %2
14168 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14169 %6 = bitcast <16 x i1> %5 to i16
14174 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14175 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14176 ; VLX: # %bb.0: # %entry
14177 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14178 ; VLX-NEXT: kmovd %k0, %eax
14179 ; VLX-NEXT: vzeroupper
14182 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14183 ; NoVLX: # %bb.0: # %entry
14184 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14185 ; NoVLX-NEXT: kmovw %k0, %eax
14186 ; NoVLX-NEXT: vzeroupper
14189 %0 = bitcast <8 x i64> %__a to <8 x i64>
14190 %1 = bitcast <8 x i64> %__b to <8 x i64>
14191 %2 = icmp sge <8 x i64> %0, %1
14192 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14193 %4 = bitcast <32 x i1> %3 to i32
14197 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14198 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14199 ; VLX: # %bb.0: # %entry
14200 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14201 ; VLX-NEXT: kmovd %k0, %eax
14202 ; VLX-NEXT: vzeroupper
14205 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14206 ; NoVLX: # %bb.0: # %entry
14207 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14208 ; NoVLX-NEXT: kmovw %k0, %eax
14209 ; NoVLX-NEXT: vzeroupper
14212 %0 = bitcast <8 x i64> %__a to <8 x i64>
14213 %load = load <8 x i64>, ptr %__b
14214 %1 = bitcast <8 x i64> %load to <8 x i64>
14215 %2 = icmp sge <8 x i64> %0, %1
14216 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14217 %4 = bitcast <32 x i1> %3 to i32
14221 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14222 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14223 ; VLX: # %bb.0: # %entry
14224 ; VLX-NEXT: kmovd %edi, %k1
14225 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14226 ; VLX-NEXT: kmovd %k0, %eax
14227 ; VLX-NEXT: vzeroupper
14230 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14231 ; NoVLX: # %bb.0: # %entry
14232 ; NoVLX-NEXT: kmovw %edi, %k1
14233 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14234 ; NoVLX-NEXT: kmovw %k0, %eax
14235 ; NoVLX-NEXT: vzeroupper
14238 %0 = bitcast <8 x i64> %__a to <8 x i64>
14239 %1 = bitcast <8 x i64> %__b to <8 x i64>
14240 %2 = icmp sge <8 x i64> %0, %1
14241 %3 = bitcast i8 %__u to <8 x i1>
14242 %4 = and <8 x i1> %2, %3
14243 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14244 %6 = bitcast <32 x i1> %5 to i32
14248 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14249 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14250 ; VLX: # %bb.0: # %entry
14251 ; VLX-NEXT: kmovd %edi, %k1
14252 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14253 ; VLX-NEXT: kmovd %k0, %eax
14254 ; VLX-NEXT: vzeroupper
14257 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14258 ; NoVLX: # %bb.0: # %entry
14259 ; NoVLX-NEXT: kmovw %edi, %k1
14260 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14261 ; NoVLX-NEXT: kmovw %k0, %eax
14262 ; NoVLX-NEXT: vzeroupper
14265 %0 = bitcast <8 x i64> %__a to <8 x i64>
14266 %load = load <8 x i64>, ptr %__b
14267 %1 = bitcast <8 x i64> %load to <8 x i64>
14268 %2 = icmp sge <8 x i64> %0, %1
14269 %3 = bitcast i8 %__u to <8 x i1>
14270 %4 = and <8 x i1> %2, %3
14271 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14272 %6 = bitcast <32 x i1> %5 to i32
14277 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14278 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14279 ; VLX: # %bb.0: # %entry
14280 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14281 ; VLX-NEXT: kmovd %k0, %eax
14282 ; VLX-NEXT: vzeroupper
14285 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14286 ; NoVLX: # %bb.0: # %entry
14287 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14288 ; NoVLX-NEXT: kmovw %k0, %eax
14289 ; NoVLX-NEXT: vzeroupper
14292 %0 = bitcast <8 x i64> %__a to <8 x i64>
14293 %load = load i64, ptr %__b
14294 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14295 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14296 %2 = icmp sge <8 x i64> %0, %1
14297 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14298 %4 = bitcast <32 x i1> %3 to i32
14302 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14303 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14304 ; VLX: # %bb.0: # %entry
14305 ; VLX-NEXT: kmovd %edi, %k1
14306 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14307 ; VLX-NEXT: kmovd %k0, %eax
14308 ; VLX-NEXT: vzeroupper
14311 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14312 ; NoVLX: # %bb.0: # %entry
14313 ; NoVLX-NEXT: kmovw %edi, %k1
14314 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14315 ; NoVLX-NEXT: kmovw %k0, %eax
14316 ; NoVLX-NEXT: vzeroupper
14319 %0 = bitcast <8 x i64> %__a to <8 x i64>
14320 %load = load i64, ptr %__b
14321 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14322 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14323 %2 = icmp sge <8 x i64> %0, %1
14324 %3 = bitcast i8 %__u to <8 x i1>
14325 %4 = and <8 x i1> %3, %2
14326 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14327 %6 = bitcast <32 x i1> %5 to i32
14332 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14333 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14334 ; VLX: # %bb.0: # %entry
14335 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14336 ; VLX-NEXT: kmovq %k0, %rax
14337 ; VLX-NEXT: vzeroupper
14340 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14341 ; NoVLX: # %bb.0: # %entry
14342 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14343 ; NoVLX-NEXT: kmovw %k0, %eax
14344 ; NoVLX-NEXT: vzeroupper
14347 %0 = bitcast <8 x i64> %__a to <8 x i64>
14348 %1 = bitcast <8 x i64> %__b to <8 x i64>
14349 %2 = icmp sge <8 x i64> %0, %1
14350 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14351 %4 = bitcast <64 x i1> %3 to i64
14355 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14356 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14357 ; VLX: # %bb.0: # %entry
14358 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14359 ; VLX-NEXT: kmovq %k0, %rax
14360 ; VLX-NEXT: vzeroupper
14363 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14364 ; NoVLX: # %bb.0: # %entry
14365 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14366 ; NoVLX-NEXT: kmovw %k0, %eax
14367 ; NoVLX-NEXT: vzeroupper
14370 %0 = bitcast <8 x i64> %__a to <8 x i64>
14371 %load = load <8 x i64>, ptr %__b
14372 %1 = bitcast <8 x i64> %load to <8 x i64>
14373 %2 = icmp sge <8 x i64> %0, %1
14374 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14375 %4 = bitcast <64 x i1> %3 to i64
14379 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14380 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14381 ; VLX: # %bb.0: # %entry
14382 ; VLX-NEXT: kmovd %edi, %k1
14383 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14384 ; VLX-NEXT: kmovq %k0, %rax
14385 ; VLX-NEXT: vzeroupper
14388 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14389 ; NoVLX: # %bb.0: # %entry
14390 ; NoVLX-NEXT: kmovw %edi, %k1
14391 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14392 ; NoVLX-NEXT: kmovw %k0, %eax
14393 ; NoVLX-NEXT: vzeroupper
14396 %0 = bitcast <8 x i64> %__a to <8 x i64>
14397 %1 = bitcast <8 x i64> %__b to <8 x i64>
14398 %2 = icmp sge <8 x i64> %0, %1
14399 %3 = bitcast i8 %__u to <8 x i1>
14400 %4 = and <8 x i1> %2, %3
14401 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14402 %6 = bitcast <64 x i1> %5 to i64
14406 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14407 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14408 ; VLX: # %bb.0: # %entry
14409 ; VLX-NEXT: kmovd %edi, %k1
14410 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14411 ; VLX-NEXT: kmovq %k0, %rax
14412 ; VLX-NEXT: vzeroupper
14415 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14416 ; NoVLX: # %bb.0: # %entry
14417 ; NoVLX-NEXT: kmovw %edi, %k1
14418 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14419 ; NoVLX-NEXT: kmovw %k0, %eax
14420 ; NoVLX-NEXT: vzeroupper
14423 %0 = bitcast <8 x i64> %__a to <8 x i64>
14424 %load = load <8 x i64>, ptr %__b
14425 %1 = bitcast <8 x i64> %load to <8 x i64>
14426 %2 = icmp sge <8 x i64> %0, %1
14427 %3 = bitcast i8 %__u to <8 x i1>
14428 %4 = and <8 x i1> %2, %3
14429 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14430 %6 = bitcast <64 x i1> %5 to i64
14435 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
14436 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14437 ; VLX: # %bb.0: # %entry
14438 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14439 ; VLX-NEXT: kmovq %k0, %rax
14440 ; VLX-NEXT: vzeroupper
14443 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14444 ; NoVLX: # %bb.0: # %entry
14445 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14446 ; NoVLX-NEXT: kmovw %k0, %eax
14447 ; NoVLX-NEXT: vzeroupper
14450 %0 = bitcast <8 x i64> %__a to <8 x i64>
14451 %load = load i64, ptr %__b
14452 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14453 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14454 %2 = icmp sge <8 x i64> %0, %1
14455 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14456 %4 = bitcast <64 x i1> %3 to i64
14460 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
14461 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14462 ; VLX: # %bb.0: # %entry
14463 ; VLX-NEXT: kmovd %edi, %k1
14464 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14465 ; VLX-NEXT: kmovq %k0, %rax
14466 ; VLX-NEXT: vzeroupper
14469 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14470 ; NoVLX: # %bb.0: # %entry
14471 ; NoVLX-NEXT: kmovw %edi, %k1
14472 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14473 ; NoVLX-NEXT: kmovw %k0, %eax
14474 ; NoVLX-NEXT: vzeroupper
14477 %0 = bitcast <8 x i64> %__a to <8 x i64>
14478 %load = load i64, ptr %__b
14479 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14480 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14481 %2 = icmp sge <8 x i64> %0, %1
14482 %3 = bitcast i8 %__u to <8 x i1>
14483 %4 = and <8 x i1> %3, %2
14484 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14485 %6 = bitcast <64 x i1> %5 to i64
14490 define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14491 ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14492 ; VLX: # %bb.0: # %entry
14493 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
14494 ; VLX-NEXT: kmovd %k0, %eax
14497 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14498 ; NoVLX: # %bb.0: # %entry
14499 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14500 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14501 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14502 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14503 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14504 ; NoVLX-NEXT: kmovw %k0, %eax
14505 ; NoVLX-NEXT: vzeroupper
14508 %0 = bitcast <2 x i64> %__a to <16 x i8>
14509 %1 = bitcast <2 x i64> %__b to <16 x i8>
14510 %2 = icmp ult <16 x i8> %0, %1
14511 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14512 %4 = bitcast <32 x i1> %3 to i32
14516 define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
14517 ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14518 ; VLX: # %bb.0: # %entry
14519 ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
14520 ; VLX-NEXT: kmovd %k0, %eax
14523 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14524 ; NoVLX: # %bb.0: # %entry
14525 ; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1
14526 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14527 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14528 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14529 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14530 ; NoVLX-NEXT: kmovw %k0, %eax
14531 ; NoVLX-NEXT: vzeroupper
14534 %0 = bitcast <2 x i64> %__a to <16 x i8>
14535 %load = load <2 x i64>, ptr %__b
14536 %1 = bitcast <2 x i64> %load to <16 x i8>
14537 %2 = icmp ult <16 x i8> %0, %1
14538 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14539 %4 = bitcast <32 x i1> %3 to i32
14543 define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14544 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14545 ; VLX: # %bb.0: # %entry
14546 ; VLX-NEXT: kmovd %edi, %k1
14547 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14548 ; VLX-NEXT: kmovd %k0, %eax
14551 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14552 ; NoVLX: # %bb.0: # %entry
14553 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14554 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14555 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14556 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14557 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14558 ; NoVLX-NEXT: kmovw %k0, %eax
14559 ; NoVLX-NEXT: andl %edi, %eax
14560 ; NoVLX-NEXT: vzeroupper
14563 %0 = bitcast <2 x i64> %__a to <16 x i8>
14564 %1 = bitcast <2 x i64> %__b to <16 x i8>
14565 %2 = icmp ult <16 x i8> %0, %1
14566 %3 = bitcast i16 %__u to <16 x i1>
14567 %4 = and <16 x i1> %2, %3
14568 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14569 %6 = bitcast <32 x i1> %5 to i32
14573 define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
14574 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14575 ; VLX: # %bb.0: # %entry
14576 ; VLX-NEXT: kmovd %edi, %k1
14577 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14578 ; VLX-NEXT: kmovd %k0, %eax
14581 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14582 ; NoVLX: # %bb.0: # %entry
14583 ; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1
14584 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14585 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14586 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14587 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14588 ; NoVLX-NEXT: kmovw %k0, %eax
14589 ; NoVLX-NEXT: andl %edi, %eax
14590 ; NoVLX-NEXT: vzeroupper
14593 %0 = bitcast <2 x i64> %__a to <16 x i8>
14594 %load = load <2 x i64>, ptr %__b
14595 %1 = bitcast <2 x i64> %load to <16 x i8>
14596 %2 = icmp ult <16 x i8> %0, %1
14597 %3 = bitcast i16 %__u to <16 x i1>
14598 %4 = and <16 x i1> %2, %3
14599 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14600 %6 = bitcast <32 x i1> %5 to i32
14605 define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14606 ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14607 ; VLX: # %bb.0: # %entry
14608 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
14609 ; VLX-NEXT: kmovq %k0, %rax
14612 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14613 ; NoVLX: # %bb.0: # %entry
14614 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14615 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14616 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14617 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14618 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14619 ; NoVLX-NEXT: kmovw %k0, %eax
14620 ; NoVLX-NEXT: vzeroupper
14623 %0 = bitcast <2 x i64> %__a to <16 x i8>
14624 %1 = bitcast <2 x i64> %__b to <16 x i8>
14625 %2 = icmp ult <16 x i8> %0, %1
14626 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14627 %4 = bitcast <64 x i1> %3 to i64
14631 define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
14632 ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14633 ; VLX: # %bb.0: # %entry
14634 ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
14635 ; VLX-NEXT: kmovq %k0, %rax
14638 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14639 ; NoVLX: # %bb.0: # %entry
14640 ; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1
14641 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14642 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14643 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14644 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14645 ; NoVLX-NEXT: kmovw %k0, %eax
14646 ; NoVLX-NEXT: vzeroupper
14649 %0 = bitcast <2 x i64> %__a to <16 x i8>
14650 %load = load <2 x i64>, ptr %__b
14651 %1 = bitcast <2 x i64> %load to <16 x i8>
14652 %2 = icmp ult <16 x i8> %0, %1
14653 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14654 %4 = bitcast <64 x i1> %3 to i64
14658 define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14659 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14660 ; VLX: # %bb.0: # %entry
14661 ; VLX-NEXT: kmovd %edi, %k1
14662 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14663 ; VLX-NEXT: kmovq %k0, %rax
14666 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14667 ; NoVLX: # %bb.0: # %entry
14668 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14669 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14670 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14671 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14672 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14673 ; NoVLX-NEXT: kmovw %k0, %eax
14674 ; NoVLX-NEXT: andl %edi, %eax
14675 ; NoVLX-NEXT: vzeroupper
14678 %0 = bitcast <2 x i64> %__a to <16 x i8>
14679 %1 = bitcast <2 x i64> %__b to <16 x i8>
14680 %2 = icmp ult <16 x i8> %0, %1
14681 %3 = bitcast i16 %__u to <16 x i1>
14682 %4 = and <16 x i1> %2, %3
14683 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14684 %6 = bitcast <64 x i1> %5 to i64
14688 define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
14689 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14690 ; VLX: # %bb.0: # %entry
14691 ; VLX-NEXT: kmovd %edi, %k1
14692 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14693 ; VLX-NEXT: kmovq %k0, %rax
14696 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14697 ; NoVLX: # %bb.0: # %entry
14698 ; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1
14699 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14700 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14701 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14702 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14703 ; NoVLX-NEXT: kmovw %k0, %eax
14704 ; NoVLX-NEXT: andl %edi, %eax
14705 ; NoVLX-NEXT: vzeroupper
14708 %0 = bitcast <2 x i64> %__a to <16 x i8>
14709 %load = load <2 x i64>, ptr %__b
14710 %1 = bitcast <2 x i64> %load to <16 x i8>
14711 %2 = icmp ult <16 x i8> %0, %1
14712 %3 = bitcast i16 %__u to <16 x i1>
14713 %4 = and <16 x i1> %2, %3
14714 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14715 %6 = bitcast <64 x i1> %5 to i64
14720 define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14721 ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
14722 ; VLX: # %bb.0: # %entry
14723 ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0
14724 ; VLX-NEXT: kmovq %k0, %rax
14725 ; VLX-NEXT: vzeroupper
14728 ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
14729 ; NoVLX: # %bb.0: # %entry
14730 ; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1
14731 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14732 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14733 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14734 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14735 ; NoVLX-NEXT: kmovw %k0, %ecx
14736 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14737 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14738 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14739 ; NoVLX-NEXT: kmovw %k0, %eax
14740 ; NoVLX-NEXT: shll $16, %eax
14741 ; NoVLX-NEXT: orl %ecx, %eax
14742 ; NoVLX-NEXT: vzeroupper
14745 %0 = bitcast <4 x i64> %__a to <32 x i8>
14746 %1 = bitcast <4 x i64> %__b to <32 x i8>
14747 %2 = icmp ult <32 x i8> %0, %1
14748 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14749 %4 = bitcast <64 x i1> %3 to i64
14753 define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
14754 ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
14755 ; VLX: # %bb.0: # %entry
14756 ; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0
14757 ; VLX-NEXT: kmovq %k0, %rax
14758 ; VLX-NEXT: vzeroupper
14761 ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
14762 ; NoVLX: # %bb.0: # %entry
14763 ; NoVLX-NEXT: vpmaxub (%rdi), %ymm0, %ymm1
14764 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14765 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14766 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14767 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14768 ; NoVLX-NEXT: kmovw %k0, %ecx
14769 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14770 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14771 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14772 ; NoVLX-NEXT: kmovw %k0, %eax
14773 ; NoVLX-NEXT: shll $16, %eax
14774 ; NoVLX-NEXT: orl %ecx, %eax
14775 ; NoVLX-NEXT: vzeroupper
14778 %0 = bitcast <4 x i64> %__a to <32 x i8>
14779 %load = load <4 x i64>, ptr %__b
14780 %1 = bitcast <4 x i64> %load to <32 x i8>
14781 %2 = icmp ult <32 x i8> %0, %1
14782 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14783 %4 = bitcast <64 x i1> %3 to i64
14787 define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14788 ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
14789 ; VLX: # %bb.0: # %entry
14790 ; VLX-NEXT: kmovd %edi, %k1
14791 ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1}
14792 ; VLX-NEXT: kmovq %k0, %rax
14793 ; VLX-NEXT: vzeroupper
14796 ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
14797 ; NoVLX: # %bb.0: # %entry
14798 ; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1
14799 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14800 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14801 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14802 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14803 ; NoVLX-NEXT: kmovw %k0, %eax
14804 ; NoVLX-NEXT: andl %edi, %eax
14805 ; NoVLX-NEXT: shrl $16, %edi
14806 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14807 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14808 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14809 ; NoVLX-NEXT: kmovw %k0, %ecx
14810 ; NoVLX-NEXT: andl %edi, %ecx
14811 ; NoVLX-NEXT: shll $16, %ecx
14812 ; NoVLX-NEXT: movzwl %ax, %eax
14813 ; NoVLX-NEXT: orl %ecx, %eax
14814 ; NoVLX-NEXT: vzeroupper
14817 %0 = bitcast <4 x i64> %__a to <32 x i8>
14818 %1 = bitcast <4 x i64> %__b to <32 x i8>
14819 %2 = icmp ult <32 x i8> %0, %1
14820 %3 = bitcast i32 %__u to <32 x i1>
14821 %4 = and <32 x i1> %2, %3
14822 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14823 %6 = bitcast <64 x i1> %5 to i64
14827 define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
14828 ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
14829 ; VLX: # %bb.0: # %entry
14830 ; VLX-NEXT: kmovd %edi, %k1
14831 ; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1}
14832 ; VLX-NEXT: kmovq %k0, %rax
14833 ; VLX-NEXT: vzeroupper
14836 ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
14837 ; NoVLX: # %bb.0: # %entry
14838 ; NoVLX-NEXT: vpmaxub (%rsi), %ymm0, %ymm1
14839 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14840 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14841 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14842 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14843 ; NoVLX-NEXT: kmovw %k0, %eax
14844 ; NoVLX-NEXT: andl %edi, %eax
14845 ; NoVLX-NEXT: shrl $16, %edi
14846 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14847 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14848 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14849 ; NoVLX-NEXT: kmovw %k0, %ecx
14850 ; NoVLX-NEXT: andl %edi, %ecx
14851 ; NoVLX-NEXT: shll $16, %ecx
14852 ; NoVLX-NEXT: movzwl %ax, %eax
14853 ; NoVLX-NEXT: orl %ecx, %eax
14854 ; NoVLX-NEXT: vzeroupper
14857 %0 = bitcast <4 x i64> %__a to <32 x i8>
14858 %load = load <4 x i64>, ptr %__b
14859 %1 = bitcast <4 x i64> %load to <32 x i8>
14860 %2 = icmp ult <32 x i8> %0, %1
14861 %3 = bitcast i32 %__u to <32 x i1>
14862 %4 = and <32 x i1> %2, %3
14863 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14864 %6 = bitcast <64 x i1> %5 to i64
14869 define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14870 ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
14871 ; VLX: # %bb.0: # %entry
14872 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
14873 ; VLX-NEXT: kmovd %k0, %eax
14874 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14877 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
14878 ; NoVLX: # %bb.0: # %entry
14879 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
14880 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14881 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14882 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14883 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14884 ; NoVLX-NEXT: kmovw %k0, %eax
14885 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14886 ; NoVLX-NEXT: vzeroupper
14889 %0 = bitcast <2 x i64> %__a to <8 x i16>
14890 %1 = bitcast <2 x i64> %__b to <8 x i16>
14891 %2 = icmp ult <8 x i16> %0, %1
14892 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14893 %4 = bitcast <16 x i1> %3 to i16
14897 define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
14898 ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
14899 ; VLX: # %bb.0: # %entry
14900 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
14901 ; VLX-NEXT: kmovd %k0, %eax
14902 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14905 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
14906 ; NoVLX: # %bb.0: # %entry
14907 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
14908 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14909 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14910 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14911 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
14912 ; NoVLX-NEXT: kmovw %k0, %eax
14913 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14914 ; NoVLX-NEXT: vzeroupper
14917 %0 = bitcast <2 x i64> %__a to <8 x i16>
14918 %load = load <2 x i64>, ptr %__b
14919 %1 = bitcast <2 x i64> %load to <8 x i16>
14920 %2 = icmp ult <8 x i16> %0, %1
14921 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14922 %4 = bitcast <16 x i1> %3 to i16
14926 define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14927 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
14928 ; VLX: # %bb.0: # %entry
14929 ; VLX-NEXT: kmovd %edi, %k1
14930 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
14931 ; VLX-NEXT: kmovd %k0, %eax
14932 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14935 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
14936 ; NoVLX: # %bb.0: # %entry
14937 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
14938 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14939 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14940 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14941 ; NoVLX-NEXT: kmovw %edi, %k1
14942 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
14943 ; NoVLX-NEXT: kmovw %k0, %eax
14944 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14945 ; NoVLX-NEXT: vzeroupper
14948 %0 = bitcast <2 x i64> %__a to <8 x i16>
14949 %1 = bitcast <2 x i64> %__b to <8 x i16>
14950 %2 = icmp ult <8 x i16> %0, %1
14951 %3 = bitcast i8 %__u to <8 x i1>
14952 %4 = and <8 x i1> %2, %3
14953 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14954 %6 = bitcast <16 x i1> %5 to i16
14958 define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
14959 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
14960 ; VLX: # %bb.0: # %entry
14961 ; VLX-NEXT: kmovd %edi, %k1
14962 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
14963 ; VLX-NEXT: kmovd %k0, %eax
14964 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14967 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
14968 ; NoVLX: # %bb.0: # %entry
14969 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
14970 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
14971 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14972 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
14973 ; NoVLX-NEXT: kmovw %edi, %k1
14974 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
14975 ; NoVLX-NEXT: kmovw %k0, %eax
14976 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14977 ; NoVLX-NEXT: vzeroupper
14980 %0 = bitcast <2 x i64> %__a to <8 x i16>
14981 %load = load <2 x i64>, ptr %__b
14982 %1 = bitcast <2 x i64> %load to <8 x i16>
14983 %2 = icmp ult <8 x i16> %0, %1
14984 %3 = bitcast i8 %__u to <8 x i1>
14985 %4 = and <8 x i1> %2, %3
14986 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14987 %6 = bitcast <16 x i1> %5 to i16
14992 define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14993 ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
14994 ; VLX: # %bb.0: # %entry
14995 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
14996 ; VLX-NEXT: kmovd %k0, %eax
14999 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
15000 ; NoVLX: # %bb.0: # %entry
15001 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15002 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15003 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15004 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15005 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15006 ; NoVLX-NEXT: kmovw %k0, %eax
15007 ; NoVLX-NEXT: vzeroupper
15010 %0 = bitcast <2 x i64> %__a to <8 x i16>
15011 %1 = bitcast <2 x i64> %__b to <8 x i16>
15012 %2 = icmp ult <8 x i16> %0, %1
15013 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15014 %4 = bitcast <32 x i1> %3 to i32
15018 define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15019 ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15020 ; VLX: # %bb.0: # %entry
15021 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
15022 ; VLX-NEXT: kmovd %k0, %eax
15025 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15026 ; NoVLX: # %bb.0: # %entry
15027 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
15028 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15029 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15030 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15031 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15032 ; NoVLX-NEXT: kmovw %k0, %eax
15033 ; NoVLX-NEXT: vzeroupper
15036 %0 = bitcast <2 x i64> %__a to <8 x i16>
15037 %load = load <2 x i64>, ptr %__b
15038 %1 = bitcast <2 x i64> %load to <8 x i16>
15039 %2 = icmp ult <8 x i16> %0, %1
15040 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15041 %4 = bitcast <32 x i1> %3 to i32
15045 define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15046 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15047 ; VLX: # %bb.0: # %entry
15048 ; VLX-NEXT: kmovd %edi, %k1
15049 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15050 ; VLX-NEXT: kmovd %k0, %eax
15053 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15054 ; NoVLX: # %bb.0: # %entry
15055 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15056 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15057 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15058 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15059 ; NoVLX-NEXT: kmovw %edi, %k1
15060 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15061 ; NoVLX-NEXT: kmovw %k0, %eax
15062 ; NoVLX-NEXT: vzeroupper
15065 %0 = bitcast <2 x i64> %__a to <8 x i16>
15066 %1 = bitcast <2 x i64> %__b to <8 x i16>
15067 %2 = icmp ult <8 x i16> %0, %1
15068 %3 = bitcast i8 %__u to <8 x i1>
15069 %4 = and <8 x i1> %2, %3
15070 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15071 %6 = bitcast <32 x i1> %5 to i32
15075 define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15076 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15077 ; VLX: # %bb.0: # %entry
15078 ; VLX-NEXT: kmovd %edi, %k1
15079 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15080 ; VLX-NEXT: kmovd %k0, %eax
15083 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15084 ; NoVLX: # %bb.0: # %entry
15085 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
15086 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15087 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15088 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15089 ; NoVLX-NEXT: kmovw %edi, %k1
15090 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15091 ; NoVLX-NEXT: kmovw %k0, %eax
15092 ; NoVLX-NEXT: vzeroupper
15095 %0 = bitcast <2 x i64> %__a to <8 x i16>
15096 %load = load <2 x i64>, ptr %__b
15097 %1 = bitcast <2 x i64> %load to <8 x i16>
15098 %2 = icmp ult <8 x i16> %0, %1
15099 %3 = bitcast i8 %__u to <8 x i1>
15100 %4 = and <8 x i1> %2, %3
15101 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15102 %6 = bitcast <32 x i1> %5 to i32
15107 define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15108 ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15109 ; VLX: # %bb.0: # %entry
15110 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
15111 ; VLX-NEXT: kmovq %k0, %rax
15114 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15115 ; NoVLX: # %bb.0: # %entry
15116 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15117 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15118 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15119 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15120 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15121 ; NoVLX-NEXT: kmovw %k0, %eax
15122 ; NoVLX-NEXT: vzeroupper
15125 %0 = bitcast <2 x i64> %__a to <8 x i16>
15126 %1 = bitcast <2 x i64> %__b to <8 x i16>
15127 %2 = icmp ult <8 x i16> %0, %1
15128 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15129 %4 = bitcast <64 x i1> %3 to i64
15133 define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15134 ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15135 ; VLX: # %bb.0: # %entry
15136 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
15137 ; VLX-NEXT: kmovq %k0, %rax
15140 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15141 ; NoVLX: # %bb.0: # %entry
15142 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
15143 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15144 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15145 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15146 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15147 ; NoVLX-NEXT: kmovw %k0, %eax
15148 ; NoVLX-NEXT: vzeroupper
15151 %0 = bitcast <2 x i64> %__a to <8 x i16>
15152 %load = load <2 x i64>, ptr %__b
15153 %1 = bitcast <2 x i64> %load to <8 x i16>
15154 %2 = icmp ult <8 x i16> %0, %1
15155 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15156 %4 = bitcast <64 x i1> %3 to i64
15160 define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15161 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15162 ; VLX: # %bb.0: # %entry
15163 ; VLX-NEXT: kmovd %edi, %k1
15164 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15165 ; VLX-NEXT: kmovq %k0, %rax
15168 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15169 ; NoVLX: # %bb.0: # %entry
15170 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15171 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15172 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15173 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15174 ; NoVLX-NEXT: kmovw %edi, %k1
15175 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15176 ; NoVLX-NEXT: kmovw %k0, %eax
15177 ; NoVLX-NEXT: vzeroupper
15180 %0 = bitcast <2 x i64> %__a to <8 x i16>
15181 %1 = bitcast <2 x i64> %__b to <8 x i16>
15182 %2 = icmp ult <8 x i16> %0, %1
15183 %3 = bitcast i8 %__u to <8 x i1>
15184 %4 = and <8 x i1> %2, %3
15185 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15186 %6 = bitcast <64 x i1> %5 to i64
15190 define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15191 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15192 ; VLX: # %bb.0: # %entry
15193 ; VLX-NEXT: kmovd %edi, %k1
15194 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15195 ; VLX-NEXT: kmovq %k0, %rax
15198 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15199 ; NoVLX: # %bb.0: # %entry
15200 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
15201 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15202 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15203 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15204 ; NoVLX-NEXT: kmovw %edi, %k1
15205 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15206 ; NoVLX-NEXT: kmovw %k0, %eax
15207 ; NoVLX-NEXT: vzeroupper
15210 %0 = bitcast <2 x i64> %__a to <8 x i16>
15211 %load = load <2 x i64>, ptr %__b
15212 %1 = bitcast <2 x i64> %load to <8 x i16>
15213 %2 = icmp ult <8 x i16> %0, %1
15214 %3 = bitcast i8 %__u to <8 x i1>
15215 %4 = and <8 x i1> %2, %3
15216 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15217 %6 = bitcast <64 x i1> %5 to i64
15222 define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15223 ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15224 ; VLX: # %bb.0: # %entry
15225 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
15226 ; VLX-NEXT: kmovd %k0, %eax
15227 ; VLX-NEXT: vzeroupper
15230 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15231 ; NoVLX: # %bb.0: # %entry
15232 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15233 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15234 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15235 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15236 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15237 ; NoVLX-NEXT: kmovw %k0, %eax
15238 ; NoVLX-NEXT: vzeroupper
15241 %0 = bitcast <4 x i64> %__a to <16 x i16>
15242 %1 = bitcast <4 x i64> %__b to <16 x i16>
15243 %2 = icmp ult <16 x i16> %0, %1
15244 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15245 %4 = bitcast <32 x i1> %3 to i32
15249 define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
15250 ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15251 ; VLX: # %bb.0: # %entry
15252 ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
15253 ; VLX-NEXT: kmovd %k0, %eax
15254 ; VLX-NEXT: vzeroupper
15257 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15258 ; NoVLX: # %bb.0: # %entry
15259 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1
15260 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15261 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15262 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15263 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15264 ; NoVLX-NEXT: kmovw %k0, %eax
15265 ; NoVLX-NEXT: vzeroupper
15268 %0 = bitcast <4 x i64> %__a to <16 x i16>
15269 %load = load <4 x i64>, ptr %__b
15270 %1 = bitcast <4 x i64> %load to <16 x i16>
15271 %2 = icmp ult <16 x i16> %0, %1
15272 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15273 %4 = bitcast <32 x i1> %3 to i32
15277 define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15278 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15279 ; VLX: # %bb.0: # %entry
15280 ; VLX-NEXT: kmovd %edi, %k1
15281 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15282 ; VLX-NEXT: kmovd %k0, %eax
15283 ; VLX-NEXT: vzeroupper
15286 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15287 ; NoVLX: # %bb.0: # %entry
15288 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15289 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15290 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15291 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15292 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15293 ; NoVLX-NEXT: kmovw %k0, %eax
15294 ; NoVLX-NEXT: andl %edi, %eax
15295 ; NoVLX-NEXT: vzeroupper
15298 %0 = bitcast <4 x i64> %__a to <16 x i16>
15299 %1 = bitcast <4 x i64> %__b to <16 x i16>
15300 %2 = icmp ult <16 x i16> %0, %1
15301 %3 = bitcast i16 %__u to <16 x i1>
15302 %4 = and <16 x i1> %2, %3
15303 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15304 %6 = bitcast <32 x i1> %5 to i32
15308 define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
15309 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15310 ; VLX: # %bb.0: # %entry
15311 ; VLX-NEXT: kmovd %edi, %k1
15312 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15313 ; VLX-NEXT: kmovd %k0, %eax
15314 ; VLX-NEXT: vzeroupper
15317 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15318 ; NoVLX: # %bb.0: # %entry
15319 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15320 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15321 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15322 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15323 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15324 ; NoVLX-NEXT: kmovw %k0, %eax
15325 ; NoVLX-NEXT: andl %edi, %eax
15326 ; NoVLX-NEXT: vzeroupper
15329 %0 = bitcast <4 x i64> %__a to <16 x i16>
15330 %load = load <4 x i64>, ptr %__b
15331 %1 = bitcast <4 x i64> %load to <16 x i16>
15332 %2 = icmp ult <16 x i16> %0, %1
15333 %3 = bitcast i16 %__u to <16 x i1>
15334 %4 = and <16 x i1> %2, %3
15335 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15336 %6 = bitcast <32 x i1> %5 to i32
15341 define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15342 ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15343 ; VLX: # %bb.0: # %entry
15344 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
15345 ; VLX-NEXT: kmovq %k0, %rax
15346 ; VLX-NEXT: vzeroupper
15349 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15350 ; NoVLX: # %bb.0: # %entry
15351 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15352 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15353 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15354 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15355 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15356 ; NoVLX-NEXT: kmovw %k0, %eax
15357 ; NoVLX-NEXT: vzeroupper
15360 %0 = bitcast <4 x i64> %__a to <16 x i16>
15361 %1 = bitcast <4 x i64> %__b to <16 x i16>
15362 %2 = icmp ult <16 x i16> %0, %1
15363 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15364 %4 = bitcast <64 x i1> %3 to i64
15368 define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
15369 ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15370 ; VLX: # %bb.0: # %entry
15371 ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
15372 ; VLX-NEXT: kmovq %k0, %rax
15373 ; VLX-NEXT: vzeroupper
15376 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15377 ; NoVLX: # %bb.0: # %entry
15378 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1
15379 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15380 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15381 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15382 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15383 ; NoVLX-NEXT: kmovw %k0, %eax
15384 ; NoVLX-NEXT: vzeroupper
15387 %0 = bitcast <4 x i64> %__a to <16 x i16>
15388 %load = load <4 x i64>, ptr %__b
15389 %1 = bitcast <4 x i64> %load to <16 x i16>
15390 %2 = icmp ult <16 x i16> %0, %1
15391 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15392 %4 = bitcast <64 x i1> %3 to i64
15396 define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15397 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15398 ; VLX: # %bb.0: # %entry
15399 ; VLX-NEXT: kmovd %edi, %k1
15400 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15401 ; VLX-NEXT: kmovq %k0, %rax
15402 ; VLX-NEXT: vzeroupper
15405 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15406 ; NoVLX: # %bb.0: # %entry
15407 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15408 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15409 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15410 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15411 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15412 ; NoVLX-NEXT: kmovw %k0, %eax
15413 ; NoVLX-NEXT: andl %edi, %eax
15414 ; NoVLX-NEXT: vzeroupper
15417 %0 = bitcast <4 x i64> %__a to <16 x i16>
15418 %1 = bitcast <4 x i64> %__b to <16 x i16>
15419 %2 = icmp ult <16 x i16> %0, %1
15420 %3 = bitcast i16 %__u to <16 x i1>
15421 %4 = and <16 x i1> %2, %3
15422 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15423 %6 = bitcast <64 x i1> %5 to i64
15427 define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
15428 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15429 ; VLX: # %bb.0: # %entry
15430 ; VLX-NEXT: kmovd %edi, %k1
15431 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15432 ; VLX-NEXT: kmovq %k0, %rax
15433 ; VLX-NEXT: vzeroupper
15436 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15437 ; NoVLX: # %bb.0: # %entry
15438 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15439 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15440 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15441 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15442 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15443 ; NoVLX-NEXT: kmovw %k0, %eax
15444 ; NoVLX-NEXT: andl %edi, %eax
15445 ; NoVLX-NEXT: vzeroupper
15448 %0 = bitcast <4 x i64> %__a to <16 x i16>
15449 %load = load <4 x i64>, ptr %__b
15450 %1 = bitcast <4 x i64> %load to <16 x i16>
15451 %2 = icmp ult <16 x i16> %0, %1
15452 %3 = bitcast i16 %__u to <16 x i1>
15453 %4 = and <16 x i1> %2, %3
15454 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15455 %6 = bitcast <64 x i1> %5 to i64
15460 define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15461 ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15462 ; VLX: # %bb.0: # %entry
15463 ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
15464 ; VLX-NEXT: kmovq %k0, %rax
15465 ; VLX-NEXT: vzeroupper
15468 ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15469 ; NoVLX: # %bb.0: # %entry
15470 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
15471 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
15472 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
15473 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
15474 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
15475 ; NoVLX-NEXT: kmovw %k0, %ecx
15476 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
15477 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
15478 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15479 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15480 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15481 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15482 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15483 ; NoVLX-NEXT: kmovw %k0, %eax
15484 ; NoVLX-NEXT: shll $16, %eax
15485 ; NoVLX-NEXT: orl %ecx, %eax
15486 ; NoVLX-NEXT: vzeroupper
15489 %0 = bitcast <8 x i64> %__a to <32 x i16>
15490 %1 = bitcast <8 x i64> %__b to <32 x i16>
15491 %2 = icmp ult <32 x i16> %0, %1
15492 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15493 %4 = bitcast <64 x i1> %3 to i64
15497 define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
15498 ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15499 ; VLX: # %bb.0: # %entry
15500 ; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0
15501 ; VLX-NEXT: kmovq %k0, %rax
15502 ; VLX-NEXT: vzeroupper
15505 ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15506 ; NoVLX: # %bb.0: # %entry
15507 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1
15508 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
15509 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
15510 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
15511 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
15512 ; NoVLX-NEXT: kmovw %k0, %ecx
15513 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
15514 ; NoVLX-NEXT: vpmaxuw 32(%rdi), %ymm0, %ymm1
15515 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15516 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15517 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15518 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15519 ; NoVLX-NEXT: kmovw %k0, %eax
15520 ; NoVLX-NEXT: shll $16, %eax
15521 ; NoVLX-NEXT: orl %ecx, %eax
15522 ; NoVLX-NEXT: vzeroupper
15525 %0 = bitcast <8 x i64> %__a to <32 x i16>
15526 %load = load <8 x i64>, ptr %__b
15527 %1 = bitcast <8 x i64> %load to <32 x i16>
15528 %2 = icmp ult <32 x i16> %0, %1
15529 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15530 %4 = bitcast <64 x i1> %3 to i64
15534 define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15535 ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15536 ; VLX: # %bb.0: # %entry
15537 ; VLX-NEXT: kmovd %edi, %k1
15538 ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
15539 ; VLX-NEXT: kmovq %k0, %rax
15540 ; VLX-NEXT: vzeroupper
15543 ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15544 ; NoVLX: # %bb.0: # %entry
15545 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
15546 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
15547 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
15548 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
15549 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
15550 ; NoVLX-NEXT: kmovw %k0, %eax
15551 ; NoVLX-NEXT: andl %edi, %eax
15552 ; NoVLX-NEXT: shrl $16, %edi
15553 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
15554 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
15555 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15556 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15557 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15558 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15559 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15560 ; NoVLX-NEXT: kmovw %k0, %ecx
15561 ; NoVLX-NEXT: andl %edi, %ecx
15562 ; NoVLX-NEXT: shll $16, %ecx
15563 ; NoVLX-NEXT: movzwl %ax, %eax
15564 ; NoVLX-NEXT: orl %ecx, %eax
15565 ; NoVLX-NEXT: vzeroupper
15568 %0 = bitcast <8 x i64> %__a to <32 x i16>
15569 %1 = bitcast <8 x i64> %__b to <32 x i16>
15570 %2 = icmp ult <32 x i16> %0, %1
15571 %3 = bitcast i32 %__u to <32 x i1>
15572 %4 = and <32 x i1> %2, %3
15573 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15574 %6 = bitcast <64 x i1> %5 to i64
15578 define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
15579 ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15580 ; VLX: # %bb.0: # %entry
15581 ; VLX-NEXT: kmovd %edi, %k1
15582 ; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
15583 ; VLX-NEXT: kmovq %k0, %rax
15584 ; VLX-NEXT: vzeroupper
15587 ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15588 ; NoVLX: # %bb.0: # %entry
15589 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15590 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
15591 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
15592 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
15593 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
15594 ; NoVLX-NEXT: kmovw %k0, %eax
15595 ; NoVLX-NEXT: andl %edi, %eax
15596 ; NoVLX-NEXT: shrl $16, %edi
15597 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
15598 ; NoVLX-NEXT: vpmaxuw 32(%rsi), %ymm0, %ymm1
15599 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15600 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15601 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15602 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15603 ; NoVLX-NEXT: kmovw %k0, %ecx
15604 ; NoVLX-NEXT: andl %edi, %ecx
15605 ; NoVLX-NEXT: shll $16, %ecx
15606 ; NoVLX-NEXT: movzwl %ax, %eax
15607 ; NoVLX-NEXT: orl %ecx, %eax
15608 ; NoVLX-NEXT: vzeroupper
15611 %0 = bitcast <8 x i64> %__a to <32 x i16>
15612 %load = load <8 x i64>, ptr %__b
15613 %1 = bitcast <8 x i64> %load to <32 x i16>
15614 %2 = icmp ult <32 x i16> %0, %1
15615 %3 = bitcast i32 %__u to <32 x i1>
15616 %4 = and <32 x i1> %2, %3
15617 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15618 %6 = bitcast <64 x i1> %5 to i64
15623 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15624 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15625 ; VLX: # %bb.0: # %entry
15626 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
15627 ; VLX-NEXT: kmovd %k0, %eax
15628 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15631 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15632 ; NoVLX: # %bb.0: # %entry
15633 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15634 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15635 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15636 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15637 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15638 ; NoVLX-NEXT: kmovw %k0, %eax
15639 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15640 ; NoVLX-NEXT: vzeroupper
15643 %0 = bitcast <2 x i64> %__a to <4 x i32>
15644 %1 = bitcast <2 x i64> %__b to <4 x i32>
15645 %2 = icmp ult <4 x i32> %0, %1
15646 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15647 %4 = bitcast <8 x i1> %3 to i8
15651 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15652 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15653 ; VLX: # %bb.0: # %entry
15654 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
15655 ; VLX-NEXT: kmovd %k0, %eax
15656 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15659 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15660 ; NoVLX: # %bb.0: # %entry
15661 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15662 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
15663 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15664 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15665 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15666 ; NoVLX-NEXT: kmovw %k0, %eax
15667 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15668 ; NoVLX-NEXT: vzeroupper
15671 %0 = bitcast <2 x i64> %__a to <4 x i32>
15672 %load = load <2 x i64>, ptr %__b
15673 %1 = bitcast <2 x i64> %load to <4 x i32>
15674 %2 = icmp ult <4 x i32> %0, %1
15675 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15676 %4 = bitcast <8 x i1> %3 to i8
15680 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15681 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
15682 ; VLX: # %bb.0: # %entry
15683 ; VLX-NEXT: kmovd %edi, %k1
15684 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
15685 ; VLX-NEXT: kmovd %k0, %eax
15686 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15689 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
15690 ; NoVLX: # %bb.0: # %entry
15691 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15692 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15693 ; NoVLX-NEXT: kmovw %edi, %k1
15694 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15695 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15696 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15697 ; NoVLX-NEXT: kmovw %k0, %eax
15698 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15699 ; NoVLX-NEXT: vzeroupper
15702 %0 = bitcast <2 x i64> %__a to <4 x i32>
15703 %1 = bitcast <2 x i64> %__b to <4 x i32>
15704 %2 = icmp ult <4 x i32> %0, %1
15705 %3 = bitcast i8 %__u to <8 x i1>
15706 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15707 %4 = and <4 x i1> %2, %extract.i
15708 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15709 %6 = bitcast <8 x i1> %5 to i8
15713 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15714 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
15715 ; VLX: # %bb.0: # %entry
15716 ; VLX-NEXT: kmovd %edi, %k1
15717 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
15718 ; VLX-NEXT: kmovd %k0, %eax
15719 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15722 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
15723 ; NoVLX: # %bb.0: # %entry
15724 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15725 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
15726 ; NoVLX-NEXT: kmovw %edi, %k1
15727 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15728 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15729 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15730 ; NoVLX-NEXT: kmovw %k0, %eax
15731 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15732 ; NoVLX-NEXT: vzeroupper
15735 %0 = bitcast <2 x i64> %__a to <4 x i32>
15736 %load = load <2 x i64>, ptr %__b
15737 %1 = bitcast <2 x i64> %load to <4 x i32>
15738 %2 = icmp ult <4 x i32> %0, %1
15739 %3 = bitcast i8 %__u to <8 x i1>
15740 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15741 %4 = and <4 x i1> %2, %extract.i
15742 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15743 %6 = bitcast <8 x i1> %5 to i8
15748 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15749 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
15750 ; VLX: # %bb.0: # %entry
15751 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
15752 ; VLX-NEXT: kmovd %k0, %eax
15753 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15756 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
15757 ; NoVLX: # %bb.0: # %entry
15758 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15759 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
15760 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15761 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15762 ; NoVLX-NEXT: kmovw %k0, %eax
15763 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15764 ; NoVLX-NEXT: vzeroupper
15767 %0 = bitcast <2 x i64> %__a to <4 x i32>
15768 %load = load i32, ptr %__b
15769 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15770 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15771 %2 = icmp ult <4 x i32> %0, %1
15772 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15773 %4 = bitcast <8 x i1> %3 to i8
15777 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15778 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
15779 ; VLX: # %bb.0: # %entry
15780 ; VLX-NEXT: kmovd %edi, %k1
15781 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
15782 ; VLX-NEXT: kmovd %k0, %eax
15783 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15786 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
15787 ; NoVLX: # %bb.0: # %entry
15788 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15789 ; NoVLX-NEXT: kmovw %edi, %k1
15790 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
15791 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15792 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15793 ; NoVLX-NEXT: kmovw %k0, %eax
15794 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15795 ; NoVLX-NEXT: vzeroupper
15798 %0 = bitcast <2 x i64> %__a to <4 x i32>
15799 %load = load i32, ptr %__b
15800 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15801 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15802 %2 = icmp ult <4 x i32> %0, %1
15803 %3 = bitcast i8 %__u to <8 x i1>
15804 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15805 %4 = and <4 x i1> %extract.i, %2
15806 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15807 %6 = bitcast <8 x i1> %5 to i8
15812 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15813 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
15814 ; VLX: # %bb.0: # %entry
15815 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
15816 ; VLX-NEXT: kmovd %k0, %eax
15817 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15820 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
15821 ; NoVLX: # %bb.0: # %entry
15822 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15823 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15824 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15825 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15826 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15827 ; NoVLX-NEXT: kmovw %k0, %eax
15828 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15829 ; NoVLX-NEXT: vzeroupper
15832 %0 = bitcast <2 x i64> %__a to <4 x i32>
15833 %1 = bitcast <2 x i64> %__b to <4 x i32>
15834 %2 = icmp ult <4 x i32> %0, %1
15835 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15836 %4 = bitcast <16 x i1> %3 to i16
15840 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15841 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
15842 ; VLX: # %bb.0: # %entry
15843 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
15844 ; VLX-NEXT: kmovd %k0, %eax
15845 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15848 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
15849 ; NoVLX: # %bb.0: # %entry
15850 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15851 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
15852 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15853 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15854 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15855 ; NoVLX-NEXT: kmovw %k0, %eax
15856 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15857 ; NoVLX-NEXT: vzeroupper
15860 %0 = bitcast <2 x i64> %__a to <4 x i32>
15861 %load = load <2 x i64>, ptr %__b
15862 %1 = bitcast <2 x i64> %load to <4 x i32>
15863 %2 = icmp ult <4 x i32> %0, %1
15864 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15865 %4 = bitcast <16 x i1> %3 to i16
15869 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15870 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
15871 ; VLX: # %bb.0: # %entry
15872 ; VLX-NEXT: kmovd %edi, %k1
15873 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
15874 ; VLX-NEXT: kmovd %k0, %eax
15875 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15878 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
15879 ; NoVLX: # %bb.0: # %entry
15880 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15881 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15882 ; NoVLX-NEXT: kmovw %edi, %k1
15883 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15884 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15885 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15886 ; NoVLX-NEXT: kmovw %k0, %eax
15887 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15888 ; NoVLX-NEXT: vzeroupper
15891 %0 = bitcast <2 x i64> %__a to <4 x i32>
15892 %1 = bitcast <2 x i64> %__b to <4 x i32>
15893 %2 = icmp ult <4 x i32> %0, %1
15894 %3 = bitcast i8 %__u to <8 x i1>
15895 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15896 %4 = and <4 x i1> %2, %extract.i
15897 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15898 %6 = bitcast <16 x i1> %5 to i16
15902 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15903 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
15904 ; VLX: # %bb.0: # %entry
15905 ; VLX-NEXT: kmovd %edi, %k1
15906 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
15907 ; VLX-NEXT: kmovd %k0, %eax
15908 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15911 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
15912 ; NoVLX: # %bb.0: # %entry
15913 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15914 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
15915 ; NoVLX-NEXT: kmovw %edi, %k1
15916 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15917 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15918 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15919 ; NoVLX-NEXT: kmovw %k0, %eax
15920 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15921 ; NoVLX-NEXT: vzeroupper
15924 %0 = bitcast <2 x i64> %__a to <4 x i32>
15925 %load = load <2 x i64>, ptr %__b
15926 %1 = bitcast <2 x i64> %load to <4 x i32>
15927 %2 = icmp ult <4 x i32> %0, %1
15928 %3 = bitcast i8 %__u to <8 x i1>
15929 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15930 %4 = and <4 x i1> %2, %extract.i
15931 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15932 %6 = bitcast <16 x i1> %5 to i16
15937 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
15938 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
15939 ; VLX: # %bb.0: # %entry
15940 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
15941 ; VLX-NEXT: kmovd %k0, %eax
15942 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15945 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
15946 ; NoVLX: # %bb.0: # %entry
15947 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15948 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
15949 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15950 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15951 ; NoVLX-NEXT: kmovw %k0, %eax
15952 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15953 ; NoVLX-NEXT: vzeroupper
15956 %0 = bitcast <2 x i64> %__a to <4 x i32>
15957 %load = load i32, ptr %__b
15958 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15959 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15960 %2 = icmp ult <4 x i32> %0, %1
15961 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15962 %4 = bitcast <16 x i1> %3 to i16
15966 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
15967 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
15968 ; VLX: # %bb.0: # %entry
15969 ; VLX-NEXT: kmovd %edi, %k1
15970 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
15971 ; VLX-NEXT: kmovd %k0, %eax
15972 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15975 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
15976 ; NoVLX: # %bb.0: # %entry
15977 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15978 ; NoVLX-NEXT: kmovw %edi, %k1
15979 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
15980 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15981 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15982 ; NoVLX-NEXT: kmovw %k0, %eax
15983 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15984 ; NoVLX-NEXT: vzeroupper
15987 %0 = bitcast <2 x i64> %__a to <4 x i32>
15988 %load = load i32, ptr %__b
15989 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
15990 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
15991 %2 = icmp ult <4 x i32> %0, %1
15992 %3 = bitcast i8 %__u to <8 x i1>
15993 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15994 %4 = and <4 x i1> %extract.i, %2
15995 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
15996 %6 = bitcast <16 x i1> %5 to i16
16001 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16002 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
16003 ; VLX: # %bb.0: # %entry
16004 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
16005 ; VLX-NEXT: kmovd %k0, %eax
16008 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
16009 ; NoVLX: # %bb.0: # %entry
16010 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16011 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16012 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16013 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16014 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16015 ; NoVLX-NEXT: kmovw %k0, %eax
16016 ; NoVLX-NEXT: vzeroupper
16019 %0 = bitcast <2 x i64> %__a to <4 x i32>
16020 %1 = bitcast <2 x i64> %__b to <4 x i32>
16021 %2 = icmp ult <4 x i32> %0, %1
16022 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16023 %4 = bitcast <32 x i1> %3 to i32
16027 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
16028 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16029 ; VLX: # %bb.0: # %entry
16030 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
16031 ; VLX-NEXT: kmovd %k0, %eax
16034 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16035 ; NoVLX: # %bb.0: # %entry
16036 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16037 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
16038 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16039 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16040 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16041 ; NoVLX-NEXT: kmovw %k0, %eax
16042 ; NoVLX-NEXT: vzeroupper
16045 %0 = bitcast <2 x i64> %__a to <4 x i32>
16046 %load = load <2 x i64>, ptr %__b
16047 %1 = bitcast <2 x i64> %load to <4 x i32>
16048 %2 = icmp ult <4 x i32> %0, %1
16049 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16050 %4 = bitcast <32 x i1> %3 to i32
16054 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16055 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16056 ; VLX: # %bb.0: # %entry
16057 ; VLX-NEXT: kmovd %edi, %k1
16058 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16059 ; VLX-NEXT: kmovd %k0, %eax
16062 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16063 ; NoVLX: # %bb.0: # %entry
16064 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16065 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16066 ; NoVLX-NEXT: kmovw %edi, %k1
16067 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16068 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16069 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16070 ; NoVLX-NEXT: kmovw %k0, %eax
16071 ; NoVLX-NEXT: vzeroupper
16074 %0 = bitcast <2 x i64> %__a to <4 x i32>
16075 %1 = bitcast <2 x i64> %__b to <4 x i32>
16076 %2 = icmp ult <4 x i32> %0, %1
16077 %3 = bitcast i8 %__u to <8 x i1>
16078 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16079 %4 = and <4 x i1> %2, %extract.i
16080 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16081 %6 = bitcast <32 x i1> %5 to i32
16085 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
16086 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16087 ; VLX: # %bb.0: # %entry
16088 ; VLX-NEXT: kmovd %edi, %k1
16089 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16090 ; VLX-NEXT: kmovd %k0, %eax
16093 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16094 ; NoVLX: # %bb.0: # %entry
16095 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16096 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
16097 ; NoVLX-NEXT: kmovw %edi, %k1
16098 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16099 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16100 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16101 ; NoVLX-NEXT: kmovw %k0, %eax
16102 ; NoVLX-NEXT: vzeroupper
16105 %0 = bitcast <2 x i64> %__a to <4 x i32>
16106 %load = load <2 x i64>, ptr %__b
16107 %1 = bitcast <2 x i64> %load to <4 x i32>
16108 %2 = icmp ult <4 x i32> %0, %1
16109 %3 = bitcast i8 %__u to <8 x i1>
16110 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16111 %4 = and <4 x i1> %2, %extract.i
16112 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16113 %6 = bitcast <32 x i1> %5 to i32
16118 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
16119 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16120 ; VLX: # %bb.0: # %entry
16121 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
16122 ; VLX-NEXT: kmovd %k0, %eax
16125 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16126 ; NoVLX: # %bb.0: # %entry
16127 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16128 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
16129 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16130 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16131 ; NoVLX-NEXT: kmovw %k0, %eax
16132 ; NoVLX-NEXT: vzeroupper
16135 %0 = bitcast <2 x i64> %__a to <4 x i32>
16136 %load = load i32, ptr %__b
16137 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16138 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16139 %2 = icmp ult <4 x i32> %0, %1
16140 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16141 %4 = bitcast <32 x i1> %3 to i32
16145 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
16146 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16147 ; VLX: # %bb.0: # %entry
16148 ; VLX-NEXT: kmovd %edi, %k1
16149 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16150 ; VLX-NEXT: kmovd %k0, %eax
16153 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16154 ; NoVLX: # %bb.0: # %entry
16155 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16156 ; NoVLX-NEXT: kmovw %edi, %k1
16157 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16158 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16159 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16160 ; NoVLX-NEXT: kmovw %k0, %eax
16161 ; NoVLX-NEXT: vzeroupper
16164 %0 = bitcast <2 x i64> %__a to <4 x i32>
16165 %load = load i32, ptr %__b
16166 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16167 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16168 %2 = icmp ult <4 x i32> %0, %1
16169 %3 = bitcast i8 %__u to <8 x i1>
16170 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16171 %4 = and <4 x i1> %extract.i, %2
16172 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16173 %6 = bitcast <32 x i1> %5 to i32
16178 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16179 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16180 ; VLX: # %bb.0: # %entry
16181 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
16182 ; VLX-NEXT: kmovq %k0, %rax
16185 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16186 ; NoVLX: # %bb.0: # %entry
16187 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16188 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16189 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16190 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16191 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16192 ; NoVLX-NEXT: kmovw %k0, %eax
16193 ; NoVLX-NEXT: vzeroupper
16196 %0 = bitcast <2 x i64> %__a to <4 x i32>
16197 %1 = bitcast <2 x i64> %__b to <4 x i32>
16198 %2 = icmp ult <4 x i32> %0, %1
16199 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16200 %4 = bitcast <64 x i1> %3 to i64
16204 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
16205 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16206 ; VLX: # %bb.0: # %entry
16207 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
16208 ; VLX-NEXT: kmovq %k0, %rax
16211 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16212 ; NoVLX: # %bb.0: # %entry
16213 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16214 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
16215 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16216 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16217 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16218 ; NoVLX-NEXT: kmovw %k0, %eax
16219 ; NoVLX-NEXT: vzeroupper
16222 %0 = bitcast <2 x i64> %__a to <4 x i32>
16223 %load = load <2 x i64>, ptr %__b
16224 %1 = bitcast <2 x i64> %load to <4 x i32>
16225 %2 = icmp ult <4 x i32> %0, %1
16226 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16227 %4 = bitcast <64 x i1> %3 to i64
16231 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16232 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16233 ; VLX: # %bb.0: # %entry
16234 ; VLX-NEXT: kmovd %edi, %k1
16235 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16236 ; VLX-NEXT: kmovq %k0, %rax
16239 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16240 ; NoVLX: # %bb.0: # %entry
16241 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16242 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16243 ; NoVLX-NEXT: kmovw %edi, %k1
16244 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16245 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16246 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16247 ; NoVLX-NEXT: kmovw %k0, %eax
16248 ; NoVLX-NEXT: vzeroupper
16251 %0 = bitcast <2 x i64> %__a to <4 x i32>
16252 %1 = bitcast <2 x i64> %__b to <4 x i32>
16253 %2 = icmp ult <4 x i32> %0, %1
16254 %3 = bitcast i8 %__u to <8 x i1>
16255 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16256 %4 = and <4 x i1> %2, %extract.i
16257 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16258 %6 = bitcast <64 x i1> %5 to i64
16262 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
16263 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16264 ; VLX: # %bb.0: # %entry
16265 ; VLX-NEXT: kmovd %edi, %k1
16266 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16267 ; VLX-NEXT: kmovq %k0, %rax
16270 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16271 ; NoVLX: # %bb.0: # %entry
16272 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16273 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
16274 ; NoVLX-NEXT: kmovw %edi, %k1
16275 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16276 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16277 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16278 ; NoVLX-NEXT: kmovw %k0, %eax
16279 ; NoVLX-NEXT: vzeroupper
16282 %0 = bitcast <2 x i64> %__a to <4 x i32>
16283 %load = load <2 x i64>, ptr %__b
16284 %1 = bitcast <2 x i64> %load to <4 x i32>
16285 %2 = icmp ult <4 x i32> %0, %1
16286 %3 = bitcast i8 %__u to <8 x i1>
16287 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16288 %4 = and <4 x i1> %2, %extract.i
16289 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16290 %6 = bitcast <64 x i1> %5 to i64
16295 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
16296 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16297 ; VLX: # %bb.0: # %entry
16298 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
16299 ; VLX-NEXT: kmovq %k0, %rax
16302 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16303 ; NoVLX: # %bb.0: # %entry
16304 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16305 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
16306 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16307 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16308 ; NoVLX-NEXT: kmovw %k0, %eax
16309 ; NoVLX-NEXT: vzeroupper
16312 %0 = bitcast <2 x i64> %__a to <4 x i32>
16313 %load = load i32, ptr %__b
16314 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16315 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16316 %2 = icmp ult <4 x i32> %0, %1
16317 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16318 %4 = bitcast <64 x i1> %3 to i64
16322 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
16323 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16324 ; VLX: # %bb.0: # %entry
16325 ; VLX-NEXT: kmovd %edi, %k1
16326 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16327 ; VLX-NEXT: kmovq %k0, %rax
16330 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16331 ; NoVLX: # %bb.0: # %entry
16332 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16333 ; NoVLX-NEXT: kmovw %edi, %k1
16334 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16335 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16336 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16337 ; NoVLX-NEXT: kmovw %k0, %eax
16338 ; NoVLX-NEXT: vzeroupper
16341 %0 = bitcast <2 x i64> %__a to <4 x i32>
16342 %load = load i32, ptr %__b
16343 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16344 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16345 %2 = icmp ult <4 x i32> %0, %1
16346 %3 = bitcast i8 %__u to <8 x i1>
16347 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16348 %4 = and <4 x i1> %extract.i, %2
16349 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16350 %6 = bitcast <64 x i1> %5 to i64
16355 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16356 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16357 ; VLX: # %bb.0: # %entry
16358 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16359 ; VLX-NEXT: kmovd %k0, %eax
16360 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16361 ; VLX-NEXT: vzeroupper
16364 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16365 ; NoVLX: # %bb.0: # %entry
16366 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16367 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16368 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16369 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16370 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16371 ; NoVLX-NEXT: kmovw %k0, %eax
16372 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16373 ; NoVLX-NEXT: vzeroupper
16376 %0 = bitcast <4 x i64> %__a to <8 x i32>
16377 %1 = bitcast <4 x i64> %__b to <8 x i32>
16378 %2 = icmp ult <8 x i32> %0, %1
16379 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16380 %4 = bitcast <16 x i1> %3 to i16
16384 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16385 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16386 ; VLX: # %bb.0: # %entry
16387 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16388 ; VLX-NEXT: kmovd %k0, %eax
16389 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16390 ; VLX-NEXT: vzeroupper
16393 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16394 ; NoVLX: # %bb.0: # %entry
16395 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16396 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16397 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16398 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16399 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16400 ; NoVLX-NEXT: kmovw %k0, %eax
16401 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16402 ; NoVLX-NEXT: vzeroupper
16405 %0 = bitcast <4 x i64> %__a to <8 x i32>
16406 %load = load <4 x i64>, ptr %__b
16407 %1 = bitcast <4 x i64> %load to <8 x i32>
16408 %2 = icmp ult <8 x i32> %0, %1
16409 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16410 %4 = bitcast <16 x i1> %3 to i16
16414 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16415 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16416 ; VLX: # %bb.0: # %entry
16417 ; VLX-NEXT: kmovd %edi, %k1
16418 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16419 ; VLX-NEXT: kmovd %k0, %eax
16420 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16421 ; VLX-NEXT: vzeroupper
16424 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16425 ; NoVLX: # %bb.0: # %entry
16426 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16427 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16428 ; NoVLX-NEXT: kmovw %edi, %k1
16429 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16430 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16431 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16432 ; NoVLX-NEXT: kmovw %k0, %eax
16433 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16434 ; NoVLX-NEXT: vzeroupper
16437 %0 = bitcast <4 x i64> %__a to <8 x i32>
16438 %1 = bitcast <4 x i64> %__b to <8 x i32>
16439 %2 = icmp ult <8 x i32> %0, %1
16440 %3 = bitcast i8 %__u to <8 x i1>
16441 %4 = and <8 x i1> %2, %3
16442 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16443 %6 = bitcast <16 x i1> %5 to i16
16447 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16448 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16449 ; VLX: # %bb.0: # %entry
16450 ; VLX-NEXT: kmovd %edi, %k1
16451 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16452 ; VLX-NEXT: kmovd %k0, %eax
16453 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16454 ; VLX-NEXT: vzeroupper
16457 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16458 ; NoVLX: # %bb.0: # %entry
16459 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16460 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16461 ; NoVLX-NEXT: kmovw %edi, %k1
16462 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16463 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16464 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16465 ; NoVLX-NEXT: kmovw %k0, %eax
16466 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16467 ; NoVLX-NEXT: vzeroupper
16470 %0 = bitcast <4 x i64> %__a to <8 x i32>
16471 %load = load <4 x i64>, ptr %__b
16472 %1 = bitcast <4 x i64> %load to <8 x i32>
16473 %2 = icmp ult <8 x i32> %0, %1
16474 %3 = bitcast i8 %__u to <8 x i1>
16475 %4 = and <8 x i1> %2, %3
16476 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16477 %6 = bitcast <16 x i1> %5 to i16
16482 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16483 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16484 ; VLX: # %bb.0: # %entry
16485 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16486 ; VLX-NEXT: kmovd %k0, %eax
16487 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16488 ; VLX-NEXT: vzeroupper
16491 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16492 ; NoVLX: # %bb.0: # %entry
16493 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16494 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
16495 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16496 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16497 ; NoVLX-NEXT: kmovw %k0, %eax
16498 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16499 ; NoVLX-NEXT: vzeroupper
16502 %0 = bitcast <4 x i64> %__a to <8 x i32>
16503 %load = load i32, ptr %__b
16504 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16505 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16506 %2 = icmp ult <8 x i32> %0, %1
16507 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16508 %4 = bitcast <16 x i1> %3 to i16
16512 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16513 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16514 ; VLX: # %bb.0: # %entry
16515 ; VLX-NEXT: kmovd %edi, %k1
16516 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16517 ; VLX-NEXT: kmovd %k0, %eax
16518 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16519 ; VLX-NEXT: vzeroupper
16522 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16523 ; NoVLX: # %bb.0: # %entry
16524 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16525 ; NoVLX-NEXT: kmovw %edi, %k1
16526 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16527 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16528 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16529 ; NoVLX-NEXT: kmovw %k0, %eax
16530 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16531 ; NoVLX-NEXT: vzeroupper
16534 %0 = bitcast <4 x i64> %__a to <8 x i32>
16535 %load = load i32, ptr %__b
16536 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16537 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16538 %2 = icmp ult <8 x i32> %0, %1
16539 %3 = bitcast i8 %__u to <8 x i1>
16540 %4 = and <8 x i1> %3, %2
16541 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16542 %6 = bitcast <16 x i1> %5 to i16
16547 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16548 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16549 ; VLX: # %bb.0: # %entry
16550 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16551 ; VLX-NEXT: kmovd %k0, %eax
16552 ; VLX-NEXT: vzeroupper
16555 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16556 ; NoVLX: # %bb.0: # %entry
16557 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16558 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16559 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16560 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16561 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16562 ; NoVLX-NEXT: kmovw %k0, %eax
16563 ; NoVLX-NEXT: vzeroupper
16566 %0 = bitcast <4 x i64> %__a to <8 x i32>
16567 %1 = bitcast <4 x i64> %__b to <8 x i32>
16568 %2 = icmp ult <8 x i32> %0, %1
16569 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16570 %4 = bitcast <32 x i1> %3 to i32
16574 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16575 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16576 ; VLX: # %bb.0: # %entry
16577 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16578 ; VLX-NEXT: kmovd %k0, %eax
16579 ; VLX-NEXT: vzeroupper
16582 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16583 ; NoVLX: # %bb.0: # %entry
16584 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16585 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16586 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16587 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16588 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16589 ; NoVLX-NEXT: kmovw %k0, %eax
16590 ; NoVLX-NEXT: vzeroupper
16593 %0 = bitcast <4 x i64> %__a to <8 x i32>
16594 %load = load <4 x i64>, ptr %__b
16595 %1 = bitcast <4 x i64> %load to <8 x i32>
16596 %2 = icmp ult <8 x i32> %0, %1
16597 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16598 %4 = bitcast <32 x i1> %3 to i32
16602 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16603 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16604 ; VLX: # %bb.0: # %entry
16605 ; VLX-NEXT: kmovd %edi, %k1
16606 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16607 ; VLX-NEXT: kmovd %k0, %eax
16608 ; VLX-NEXT: vzeroupper
16611 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16612 ; NoVLX: # %bb.0: # %entry
16613 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16614 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16615 ; NoVLX-NEXT: kmovw %edi, %k1
16616 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16617 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16618 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16619 ; NoVLX-NEXT: kmovw %k0, %eax
16620 ; NoVLX-NEXT: vzeroupper
16623 %0 = bitcast <4 x i64> %__a to <8 x i32>
16624 %1 = bitcast <4 x i64> %__b to <8 x i32>
16625 %2 = icmp ult <8 x i32> %0, %1
16626 %3 = bitcast i8 %__u to <8 x i1>
16627 %4 = and <8 x i1> %2, %3
16628 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16629 %6 = bitcast <32 x i1> %5 to i32
16633 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16634 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16635 ; VLX: # %bb.0: # %entry
16636 ; VLX-NEXT: kmovd %edi, %k1
16637 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16638 ; VLX-NEXT: kmovd %k0, %eax
16639 ; VLX-NEXT: vzeroupper
16642 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16643 ; NoVLX: # %bb.0: # %entry
16644 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16645 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16646 ; NoVLX-NEXT: kmovw %edi, %k1
16647 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16648 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16649 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16650 ; NoVLX-NEXT: kmovw %k0, %eax
16651 ; NoVLX-NEXT: vzeroupper
16654 %0 = bitcast <4 x i64> %__a to <8 x i32>
16655 %load = load <4 x i64>, ptr %__b
16656 %1 = bitcast <4 x i64> %load to <8 x i32>
16657 %2 = icmp ult <8 x i32> %0, %1
16658 %3 = bitcast i8 %__u to <8 x i1>
16659 %4 = and <8 x i1> %2, %3
16660 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16661 %6 = bitcast <32 x i1> %5 to i32
16666 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16667 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
16668 ; VLX: # %bb.0: # %entry
16669 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16670 ; VLX-NEXT: kmovd %k0, %eax
16671 ; VLX-NEXT: vzeroupper
16674 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
16675 ; NoVLX: # %bb.0: # %entry
16676 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16677 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
16678 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16679 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16680 ; NoVLX-NEXT: kmovw %k0, %eax
16681 ; NoVLX-NEXT: vzeroupper
16684 %0 = bitcast <4 x i64> %__a to <8 x i32>
16685 %load = load i32, ptr %__b
16686 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16687 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16688 %2 = icmp ult <8 x i32> %0, %1
16689 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16690 %4 = bitcast <32 x i1> %3 to i32
16694 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16695 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
16696 ; VLX: # %bb.0: # %entry
16697 ; VLX-NEXT: kmovd %edi, %k1
16698 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16699 ; VLX-NEXT: kmovd %k0, %eax
16700 ; VLX-NEXT: vzeroupper
16703 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
16704 ; NoVLX: # %bb.0: # %entry
16705 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16706 ; NoVLX-NEXT: kmovw %edi, %k1
16707 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16708 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16709 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16710 ; NoVLX-NEXT: kmovw %k0, %eax
16711 ; NoVLX-NEXT: vzeroupper
16714 %0 = bitcast <4 x i64> %__a to <8 x i32>
16715 %load = load i32, ptr %__b
16716 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16717 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16718 %2 = icmp ult <8 x i32> %0, %1
16719 %3 = bitcast i8 %__u to <8 x i1>
16720 %4 = and <8 x i1> %3, %2
16721 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16722 %6 = bitcast <32 x i1> %5 to i32
16727 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16728 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
16729 ; VLX: # %bb.0: # %entry
16730 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16731 ; VLX-NEXT: kmovq %k0, %rax
16732 ; VLX-NEXT: vzeroupper
16735 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
16736 ; NoVLX: # %bb.0: # %entry
16737 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16738 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16739 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16740 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16741 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16742 ; NoVLX-NEXT: kmovw %k0, %eax
16743 ; NoVLX-NEXT: vzeroupper
16746 %0 = bitcast <4 x i64> %__a to <8 x i32>
16747 %1 = bitcast <4 x i64> %__b to <8 x i32>
16748 %2 = icmp ult <8 x i32> %0, %1
16749 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16750 %4 = bitcast <64 x i1> %3 to i64
16754 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16755 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
16756 ; VLX: # %bb.0: # %entry
16757 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16758 ; VLX-NEXT: kmovq %k0, %rax
16759 ; VLX-NEXT: vzeroupper
16762 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
16763 ; NoVLX: # %bb.0: # %entry
16764 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16765 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16766 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16767 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16768 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16769 ; NoVLX-NEXT: kmovw %k0, %eax
16770 ; NoVLX-NEXT: vzeroupper
16773 %0 = bitcast <4 x i64> %__a to <8 x i32>
16774 %load = load <4 x i64>, ptr %__b
16775 %1 = bitcast <4 x i64> %load to <8 x i32>
16776 %2 = icmp ult <8 x i32> %0, %1
16777 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16778 %4 = bitcast <64 x i1> %3 to i64
16782 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16783 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
16784 ; VLX: # %bb.0: # %entry
16785 ; VLX-NEXT: kmovd %edi, %k1
16786 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16787 ; VLX-NEXT: kmovq %k0, %rax
16788 ; VLX-NEXT: vzeroupper
16791 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
16792 ; NoVLX: # %bb.0: # %entry
16793 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16794 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16795 ; NoVLX-NEXT: kmovw %edi, %k1
16796 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16797 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16798 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16799 ; NoVLX-NEXT: kmovw %k0, %eax
16800 ; NoVLX-NEXT: vzeroupper
16803 %0 = bitcast <4 x i64> %__a to <8 x i32>
16804 %1 = bitcast <4 x i64> %__b to <8 x i32>
16805 %2 = icmp ult <8 x i32> %0, %1
16806 %3 = bitcast i8 %__u to <8 x i1>
16807 %4 = and <8 x i1> %2, %3
16808 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16809 %6 = bitcast <64 x i1> %5 to i64
16813 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16814 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
16815 ; VLX: # %bb.0: # %entry
16816 ; VLX-NEXT: kmovd %edi, %k1
16817 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16818 ; VLX-NEXT: kmovq %k0, %rax
16819 ; VLX-NEXT: vzeroupper
16822 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
16823 ; NoVLX: # %bb.0: # %entry
16824 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16825 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16826 ; NoVLX-NEXT: kmovw %edi, %k1
16827 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16828 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16829 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16830 ; NoVLX-NEXT: kmovw %k0, %eax
16831 ; NoVLX-NEXT: vzeroupper
16834 %0 = bitcast <4 x i64> %__a to <8 x i32>
16835 %load = load <4 x i64>, ptr %__b
16836 %1 = bitcast <4 x i64> %load to <8 x i32>
16837 %2 = icmp ult <8 x i32> %0, %1
16838 %3 = bitcast i8 %__u to <8 x i1>
16839 %4 = and <8 x i1> %2, %3
16840 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16841 %6 = bitcast <64 x i1> %5 to i64
16846 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
16847 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
16848 ; VLX: # %bb.0: # %entry
16849 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16850 ; VLX-NEXT: kmovq %k0, %rax
16851 ; VLX-NEXT: vzeroupper
16854 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
16855 ; NoVLX: # %bb.0: # %entry
16856 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16857 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
16858 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16859 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16860 ; NoVLX-NEXT: kmovw %k0, %eax
16861 ; NoVLX-NEXT: vzeroupper
16864 %0 = bitcast <4 x i64> %__a to <8 x i32>
16865 %load = load i32, ptr %__b
16866 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16867 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16868 %2 = icmp ult <8 x i32> %0, %1
16869 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16870 %4 = bitcast <64 x i1> %3 to i64
16874 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
16875 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
16876 ; VLX: # %bb.0: # %entry
16877 ; VLX-NEXT: kmovd %edi, %k1
16878 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16879 ; VLX-NEXT: kmovq %k0, %rax
16880 ; VLX-NEXT: vzeroupper
16883 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
16884 ; NoVLX: # %bb.0: # %entry
16885 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16886 ; NoVLX-NEXT: kmovw %edi, %k1
16887 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
16888 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16889 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16890 ; NoVLX-NEXT: kmovw %k0, %eax
16891 ; NoVLX-NEXT: vzeroupper
16894 %0 = bitcast <4 x i64> %__a to <8 x i32>
16895 %load = load i32, ptr %__b
16896 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16897 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16898 %2 = icmp ult <8 x i32> %0, %1
16899 %3 = bitcast i8 %__u to <8 x i1>
16900 %4 = and <8 x i1> %3, %2
16901 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16902 %6 = bitcast <64 x i1> %5 to i64
16907 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
16908 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
16909 ; VLX: # %bb.0: # %entry
16910 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16911 ; VLX-NEXT: kmovd %k0, %eax
16912 ; VLX-NEXT: vzeroupper
16915 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
16916 ; NoVLX: # %bb.0: # %entry
16917 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16918 ; NoVLX-NEXT: kmovw %k0, %eax
16919 ; NoVLX-NEXT: vzeroupper
16922 %0 = bitcast <8 x i64> %__a to <16 x i32>
16923 %1 = bitcast <8 x i64> %__b to <16 x i32>
16924 %2 = icmp ult <16 x i32> %0, %1
16925 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
16926 %4 = bitcast <32 x i1> %3 to i32
16930 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
16931 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
16932 ; VLX: # %bb.0: # %entry
16933 ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
16934 ; VLX-NEXT: kmovd %k0, %eax
16935 ; VLX-NEXT: vzeroupper
16938 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
16939 ; NoVLX: # %bb.0: # %entry
16940 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
16941 ; NoVLX-NEXT: kmovw %k0, %eax
16942 ; NoVLX-NEXT: vzeroupper
16945 %0 = bitcast <8 x i64> %__a to <16 x i32>
16946 %load = load <8 x i64>, ptr %__b
16947 %1 = bitcast <8 x i64> %load to <16 x i32>
16948 %2 = icmp ult <16 x i32> %0, %1
16949 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
16950 %4 = bitcast <32 x i1> %3 to i32
16954 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
16955 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
16956 ; VLX: # %bb.0: # %entry
16957 ; VLX-NEXT: kmovd %edi, %k1
16958 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16959 ; VLX-NEXT: kmovd %k0, %eax
16960 ; VLX-NEXT: vzeroupper
16963 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
16964 ; NoVLX: # %bb.0: # %entry
16965 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16966 ; NoVLX-NEXT: kmovw %k0, %eax
16967 ; NoVLX-NEXT: andl %edi, %eax
16968 ; NoVLX-NEXT: vzeroupper
16971 %0 = bitcast <8 x i64> %__a to <16 x i32>
16972 %1 = bitcast <8 x i64> %__b to <16 x i32>
16973 %2 = icmp ult <16 x i32> %0, %1
16974 %3 = bitcast i16 %__u to <16 x i1>
16975 %4 = and <16 x i1> %2, %3
16976 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
16977 %6 = bitcast <32 x i1> %5 to i32
16981 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
16982 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
16983 ; VLX: # %bb.0: # %entry
16984 ; VLX-NEXT: kmovd %edi, %k1
16985 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
16986 ; VLX-NEXT: kmovd %k0, %eax
16987 ; VLX-NEXT: vzeroupper
16990 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
16991 ; NoVLX: # %bb.0: # %entry
16992 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0
16993 ; NoVLX-NEXT: kmovw %k0, %eax
16994 ; NoVLX-NEXT: andl %edi, %eax
16995 ; NoVLX-NEXT: vzeroupper
16998 %0 = bitcast <8 x i64> %__a to <16 x i32>
16999 %load = load <8 x i64>, ptr %__b
17000 %1 = bitcast <8 x i64> %load to <16 x i32>
17001 %2 = icmp ult <16 x i32> %0, %1
17002 %3 = bitcast i16 %__u to <16 x i1>
17003 %4 = and <16 x i1> %2, %3
17004 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17005 %6 = bitcast <32 x i1> %5 to i32
17010 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
17011 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17012 ; VLX: # %bb.0: # %entry
17013 ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17014 ; VLX-NEXT: kmovd %k0, %eax
17015 ; VLX-NEXT: vzeroupper
17018 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17019 ; NoVLX: # %bb.0: # %entry
17020 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17021 ; NoVLX-NEXT: kmovw %k0, %eax
17022 ; NoVLX-NEXT: vzeroupper
17025 %0 = bitcast <8 x i64> %__a to <16 x i32>
17026 %load = load i32, ptr %__b
17027 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17028 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17029 %2 = icmp ult <16 x i32> %0, %1
17030 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17031 %4 = bitcast <32 x i1> %3 to i32
17035 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
17036 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17037 ; VLX: # %bb.0: # %entry
17038 ; VLX-NEXT: kmovd %edi, %k1
17039 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17040 ; VLX-NEXT: kmovd %k0, %eax
17041 ; VLX-NEXT: vzeroupper
17044 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17045 ; NoVLX: # %bb.0: # %entry
17046 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0
17047 ; NoVLX-NEXT: kmovw %k0, %eax
17048 ; NoVLX-NEXT: andl %edi, %eax
17049 ; NoVLX-NEXT: vzeroupper
17052 %0 = bitcast <8 x i64> %__a to <16 x i32>
17053 %load = load i32, ptr %__b
17054 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17055 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17056 %2 = icmp ult <16 x i32> %0, %1
17057 %3 = bitcast i16 %__u to <16 x i1>
17058 %4 = and <16 x i1> %3, %2
17059 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17060 %6 = bitcast <32 x i1> %5 to i32
17065 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17066 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17067 ; VLX: # %bb.0: # %entry
17068 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17069 ; VLX-NEXT: kmovq %k0, %rax
17070 ; VLX-NEXT: vzeroupper
17073 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17074 ; NoVLX: # %bb.0: # %entry
17075 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17076 ; NoVLX-NEXT: kmovw %k0, %eax
17077 ; NoVLX-NEXT: vzeroupper
17080 %0 = bitcast <8 x i64> %__a to <16 x i32>
17081 %1 = bitcast <8 x i64> %__b to <16 x i32>
17082 %2 = icmp ult <16 x i32> %0, %1
17083 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17084 %4 = bitcast <64 x i1> %3 to i64
17088 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
17089 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17090 ; VLX: # %bb.0: # %entry
17091 ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17092 ; VLX-NEXT: kmovq %k0, %rax
17093 ; VLX-NEXT: vzeroupper
17096 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17097 ; NoVLX: # %bb.0: # %entry
17098 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17099 ; NoVLX-NEXT: kmovw %k0, %eax
17100 ; NoVLX-NEXT: vzeroupper
17103 %0 = bitcast <8 x i64> %__a to <16 x i32>
17104 %load = load <8 x i64>, ptr %__b
17105 %1 = bitcast <8 x i64> %load to <16 x i32>
17106 %2 = icmp ult <16 x i32> %0, %1
17107 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17108 %4 = bitcast <64 x i1> %3 to i64
17112 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17113 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17114 ; VLX: # %bb.0: # %entry
17115 ; VLX-NEXT: kmovd %edi, %k1
17116 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17117 ; VLX-NEXT: kmovq %k0, %rax
17118 ; VLX-NEXT: vzeroupper
17121 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17122 ; NoVLX: # %bb.0: # %entry
17123 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17124 ; NoVLX-NEXT: kmovw %k0, %eax
17125 ; NoVLX-NEXT: andl %edi, %eax
17126 ; NoVLX-NEXT: vzeroupper
17129 %0 = bitcast <8 x i64> %__a to <16 x i32>
17130 %1 = bitcast <8 x i64> %__b to <16 x i32>
17131 %2 = icmp ult <16 x i32> %0, %1
17132 %3 = bitcast i16 %__u to <16 x i1>
17133 %4 = and <16 x i1> %2, %3
17134 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17135 %6 = bitcast <64 x i1> %5 to i64
17139 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
17140 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17141 ; VLX: # %bb.0: # %entry
17142 ; VLX-NEXT: kmovd %edi, %k1
17143 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
17144 ; VLX-NEXT: kmovq %k0, %rax
17145 ; VLX-NEXT: vzeroupper
17148 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17149 ; NoVLX: # %bb.0: # %entry
17150 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0
17151 ; NoVLX-NEXT: kmovw %k0, %eax
17152 ; NoVLX-NEXT: andl %edi, %eax
17153 ; NoVLX-NEXT: vzeroupper
17156 %0 = bitcast <8 x i64> %__a to <16 x i32>
17157 %load = load <8 x i64>, ptr %__b
17158 %1 = bitcast <8 x i64> %load to <16 x i32>
17159 %2 = icmp ult <16 x i32> %0, %1
17160 %3 = bitcast i16 %__u to <16 x i1>
17161 %4 = and <16 x i1> %2, %3
17162 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17163 %6 = bitcast <64 x i1> %5 to i64
17168 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
17169 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17170 ; VLX: # %bb.0: # %entry
17171 ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17172 ; VLX-NEXT: kmovq %k0, %rax
17173 ; VLX-NEXT: vzeroupper
17176 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17177 ; NoVLX: # %bb.0: # %entry
17178 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17179 ; NoVLX-NEXT: kmovw %k0, %eax
17180 ; NoVLX-NEXT: vzeroupper
17183 %0 = bitcast <8 x i64> %__a to <16 x i32>
17184 %load = load i32, ptr %__b
17185 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17186 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17187 %2 = icmp ult <16 x i32> %0, %1
17188 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17189 %4 = bitcast <64 x i1> %3 to i64
17193 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
17194 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17195 ; VLX: # %bb.0: # %entry
17196 ; VLX-NEXT: kmovd %edi, %k1
17197 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17198 ; VLX-NEXT: kmovq %k0, %rax
17199 ; VLX-NEXT: vzeroupper
17202 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17203 ; NoVLX: # %bb.0: # %entry
17204 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0
17205 ; NoVLX-NEXT: kmovw %k0, %eax
17206 ; NoVLX-NEXT: andl %edi, %eax
17207 ; NoVLX-NEXT: vzeroupper
17210 %0 = bitcast <8 x i64> %__a to <16 x i32>
17211 %load = load i32, ptr %__b
17212 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17213 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17214 %2 = icmp ult <16 x i32> %0, %1
17215 %3 = bitcast i16 %__u to <16 x i1>
17216 %4 = and <16 x i1> %3, %2
17217 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17218 %6 = bitcast <64 x i1> %5 to i64
17223 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17224 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17225 ; VLX: # %bb.0: # %entry
17226 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17227 ; VLX-NEXT: kmovb %k0, %eax
17230 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17231 ; NoVLX: # %bb.0: # %entry
17232 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17233 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17234 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17235 ; NoVLX-NEXT: kmovw %k0, %eax
17236 ; NoVLX-NEXT: andl $3, %eax
17237 ; NoVLX-NEXT: vzeroupper
17240 %0 = bitcast <2 x i64> %__a to <2 x i64>
17241 %1 = bitcast <2 x i64> %__b to <2 x i64>
17242 %2 = icmp ult <2 x i64> %0, %1
17243 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17244 %4 = bitcast <4 x i1> %3 to i4
17248 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17249 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17250 ; VLX: # %bb.0: # %entry
17251 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17252 ; VLX-NEXT: kmovb %k0, %eax
17255 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17256 ; NoVLX: # %bb.0: # %entry
17257 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17258 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17259 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17260 ; NoVLX-NEXT: kmovw %k0, %eax
17261 ; NoVLX-NEXT: andl $3, %eax
17262 ; NoVLX-NEXT: vzeroupper
17265 %0 = bitcast <2 x i64> %__a to <2 x i64>
17266 %load = load <2 x i64>, ptr %__b
17267 %1 = bitcast <2 x i64> %load to <2 x i64>
17268 %2 = icmp ult <2 x i64> %0, %1
17269 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17270 %4 = bitcast <4 x i1> %3 to i4
17274 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17275 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17276 ; VLX: # %bb.0: # %entry
17277 ; VLX-NEXT: kmovd %edi, %k1
17278 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17279 ; VLX-NEXT: kmovb %k0, %eax
17282 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17283 ; NoVLX: # %bb.0: # %entry
17284 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17285 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17286 ; NoVLX-NEXT: kmovw %edi, %k1
17287 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17288 ; NoVLX-NEXT: kmovw %k0, %eax
17289 ; NoVLX-NEXT: andl $3, %eax
17290 ; NoVLX-NEXT: vzeroupper
17293 %0 = bitcast <2 x i64> %__a to <2 x i64>
17294 %1 = bitcast <2 x i64> %__b to <2 x i64>
17295 %2 = icmp ult <2 x i64> %0, %1
17296 %3 = bitcast i8 %__u to <8 x i1>
17297 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17298 %4 = and <2 x i1> %2, %extract.i
17299 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17300 %6 = bitcast <4 x i1> %5 to i4
17304 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17305 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17306 ; VLX: # %bb.0: # %entry
17307 ; VLX-NEXT: kmovd %edi, %k1
17308 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17309 ; VLX-NEXT: kmovb %k0, %eax
17312 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17313 ; NoVLX: # %bb.0: # %entry
17314 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17315 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17316 ; NoVLX-NEXT: kmovw %edi, %k1
17317 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17318 ; NoVLX-NEXT: kmovw %k0, %eax
17319 ; NoVLX-NEXT: andl $3, %eax
17320 ; NoVLX-NEXT: vzeroupper
17323 %0 = bitcast <2 x i64> %__a to <2 x i64>
17324 %load = load <2 x i64>, ptr %__b
17325 %1 = bitcast <2 x i64> %load to <2 x i64>
17326 %2 = icmp ult <2 x i64> %0, %1
17327 %3 = bitcast i8 %__u to <8 x i1>
17328 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17329 %4 = and <2 x i1> %2, %extract.i
17330 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17331 %6 = bitcast <4 x i1> %5 to i4
17336 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17337 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17338 ; VLX: # %bb.0: # %entry
17339 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17340 ; VLX-NEXT: kmovb %k0, %eax
17343 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17344 ; NoVLX: # %bb.0: # %entry
17345 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17346 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17347 ; NoVLX-NEXT: kmovw %k0, %eax
17348 ; NoVLX-NEXT: andl $3, %eax
17349 ; NoVLX-NEXT: vzeroupper
17352 %0 = bitcast <2 x i64> %__a to <2 x i64>
17353 %load = load i64, ptr %__b
17354 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17355 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17356 %2 = icmp ult <2 x i64> %0, %1
17357 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17358 %4 = bitcast <4 x i1> %3 to i4
17362 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17363 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17364 ; VLX: # %bb.0: # %entry
17365 ; VLX-NEXT: kmovd %edi, %k1
17366 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17367 ; VLX-NEXT: kmovb %k0, %eax
17370 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17371 ; NoVLX: # %bb.0: # %entry
17372 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17373 ; NoVLX-NEXT: kmovw %edi, %k1
17374 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17375 ; NoVLX-NEXT: kmovw %k0, %eax
17376 ; NoVLX-NEXT: andl $3, %eax
17377 ; NoVLX-NEXT: vzeroupper
17380 %0 = bitcast <2 x i64> %__a to <2 x i64>
17381 %load = load i64, ptr %__b
17382 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17383 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17384 %2 = icmp ult <2 x i64> %0, %1
17385 %3 = bitcast i8 %__u to <8 x i1>
17386 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17387 %4 = and <2 x i1> %extract.i, %2
17388 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17389 %6 = bitcast <4 x i1> %5 to i4
17394 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17395 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17396 ; VLX: # %bb.0: # %entry
17397 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17398 ; VLX-NEXT: kmovd %k0, %eax
17399 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17402 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17403 ; NoVLX: # %bb.0: # %entry
17404 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17405 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17406 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17407 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17408 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17409 ; NoVLX-NEXT: kmovw %k0, %eax
17410 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17411 ; NoVLX-NEXT: vzeroupper
17414 %0 = bitcast <2 x i64> %__a to <2 x i64>
17415 %1 = bitcast <2 x i64> %__b to <2 x i64>
17416 %2 = icmp ult <2 x i64> %0, %1
17417 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17418 %4 = bitcast <8 x i1> %3 to i8
17422 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17423 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17424 ; VLX: # %bb.0: # %entry
17425 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17426 ; VLX-NEXT: kmovd %k0, %eax
17427 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17430 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17431 ; NoVLX: # %bb.0: # %entry
17432 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17433 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17434 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17435 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17436 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17437 ; NoVLX-NEXT: kmovw %k0, %eax
17438 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17439 ; NoVLX-NEXT: vzeroupper
17442 %0 = bitcast <2 x i64> %__a to <2 x i64>
17443 %load = load <2 x i64>, ptr %__b
17444 %1 = bitcast <2 x i64> %load to <2 x i64>
17445 %2 = icmp ult <2 x i64> %0, %1
17446 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17447 %4 = bitcast <8 x i1> %3 to i8
17451 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17452 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17453 ; VLX: # %bb.0: # %entry
17454 ; VLX-NEXT: kmovd %edi, %k1
17455 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17456 ; VLX-NEXT: kmovd %k0, %eax
17457 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17460 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17461 ; NoVLX: # %bb.0: # %entry
17462 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17463 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17464 ; NoVLX-NEXT: kmovw %edi, %k1
17465 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17466 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17467 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17468 ; NoVLX-NEXT: kmovw %k0, %eax
17469 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17470 ; NoVLX-NEXT: vzeroupper
17473 %0 = bitcast <2 x i64> %__a to <2 x i64>
17474 %1 = bitcast <2 x i64> %__b to <2 x i64>
17475 %2 = icmp ult <2 x i64> %0, %1
17476 %3 = bitcast i8 %__u to <8 x i1>
17477 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17478 %4 = and <2 x i1> %2, %extract.i
17479 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17480 %6 = bitcast <8 x i1> %5 to i8
17484 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17485 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17486 ; VLX: # %bb.0: # %entry
17487 ; VLX-NEXT: kmovd %edi, %k1
17488 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17489 ; VLX-NEXT: kmovd %k0, %eax
17490 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17493 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17494 ; NoVLX: # %bb.0: # %entry
17495 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17496 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17497 ; NoVLX-NEXT: kmovw %edi, %k1
17498 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17499 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17500 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17501 ; NoVLX-NEXT: kmovw %k0, %eax
17502 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17503 ; NoVLX-NEXT: vzeroupper
17506 %0 = bitcast <2 x i64> %__a to <2 x i64>
17507 %load = load <2 x i64>, ptr %__b
17508 %1 = bitcast <2 x i64> %load to <2 x i64>
17509 %2 = icmp ult <2 x i64> %0, %1
17510 %3 = bitcast i8 %__u to <8 x i1>
17511 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17512 %4 = and <2 x i1> %2, %extract.i
17513 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17514 %6 = bitcast <8 x i1> %5 to i8
17519 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17520 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17521 ; VLX: # %bb.0: # %entry
17522 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17523 ; VLX-NEXT: kmovd %k0, %eax
17524 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17527 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17528 ; NoVLX: # %bb.0: # %entry
17529 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17530 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17531 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17532 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17533 ; NoVLX-NEXT: kmovw %k0, %eax
17534 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17535 ; NoVLX-NEXT: vzeroupper
17538 %0 = bitcast <2 x i64> %__a to <2 x i64>
17539 %load = load i64, ptr %__b
17540 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17541 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17542 %2 = icmp ult <2 x i64> %0, %1
17543 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17544 %4 = bitcast <8 x i1> %3 to i8
17548 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17549 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17550 ; VLX: # %bb.0: # %entry
17551 ; VLX-NEXT: kmovd %edi, %k1
17552 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17553 ; VLX-NEXT: kmovd %k0, %eax
17554 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17557 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17558 ; NoVLX: # %bb.0: # %entry
17559 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17560 ; NoVLX-NEXT: kmovw %edi, %k1
17561 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17562 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17563 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17564 ; NoVLX-NEXT: kmovw %k0, %eax
17565 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17566 ; NoVLX-NEXT: vzeroupper
17569 %0 = bitcast <2 x i64> %__a to <2 x i64>
17570 %load = load i64, ptr %__b
17571 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17572 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17573 %2 = icmp ult <2 x i64> %0, %1
17574 %3 = bitcast i8 %__u to <8 x i1>
17575 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17576 %4 = and <2 x i1> %extract.i, %2
17577 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17578 %6 = bitcast <8 x i1> %5 to i8
17583 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17584 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17585 ; VLX: # %bb.0: # %entry
17586 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17587 ; VLX-NEXT: kmovd %k0, %eax
17588 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17591 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17592 ; NoVLX: # %bb.0: # %entry
17593 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17594 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17595 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17596 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17597 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17598 ; NoVLX-NEXT: kmovw %k0, %eax
17599 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17600 ; NoVLX-NEXT: vzeroupper
17603 %0 = bitcast <2 x i64> %__a to <2 x i64>
17604 %1 = bitcast <2 x i64> %__b to <2 x i64>
17605 %2 = icmp ult <2 x i64> %0, %1
17606 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17607 %4 = bitcast <16 x i1> %3 to i16
17611 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17612 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17613 ; VLX: # %bb.0: # %entry
17614 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17615 ; VLX-NEXT: kmovd %k0, %eax
17616 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17619 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17620 ; NoVLX: # %bb.0: # %entry
17621 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17622 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17623 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17624 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17625 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17626 ; NoVLX-NEXT: kmovw %k0, %eax
17627 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17628 ; NoVLX-NEXT: vzeroupper
17631 %0 = bitcast <2 x i64> %__a to <2 x i64>
17632 %load = load <2 x i64>, ptr %__b
17633 %1 = bitcast <2 x i64> %load to <2 x i64>
17634 %2 = icmp ult <2 x i64> %0, %1
17635 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17636 %4 = bitcast <16 x i1> %3 to i16
17640 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17641 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
17642 ; VLX: # %bb.0: # %entry
17643 ; VLX-NEXT: kmovd %edi, %k1
17644 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17645 ; VLX-NEXT: kmovd %k0, %eax
17646 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17649 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
17650 ; NoVLX: # %bb.0: # %entry
17651 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17652 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17653 ; NoVLX-NEXT: kmovw %edi, %k1
17654 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17655 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17656 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17657 ; NoVLX-NEXT: kmovw %k0, %eax
17658 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17659 ; NoVLX-NEXT: vzeroupper
17662 %0 = bitcast <2 x i64> %__a to <2 x i64>
17663 %1 = bitcast <2 x i64> %__b to <2 x i64>
17664 %2 = icmp ult <2 x i64> %0, %1
17665 %3 = bitcast i8 %__u to <8 x i1>
17666 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17667 %4 = and <2 x i1> %2, %extract.i
17668 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17669 %6 = bitcast <16 x i1> %5 to i16
17673 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17674 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
17675 ; VLX: # %bb.0: # %entry
17676 ; VLX-NEXT: kmovd %edi, %k1
17677 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17678 ; VLX-NEXT: kmovd %k0, %eax
17679 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17682 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
17683 ; NoVLX: # %bb.0: # %entry
17684 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17685 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17686 ; NoVLX-NEXT: kmovw %edi, %k1
17687 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17688 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17689 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17690 ; NoVLX-NEXT: kmovw %k0, %eax
17691 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17692 ; NoVLX-NEXT: vzeroupper
17695 %0 = bitcast <2 x i64> %__a to <2 x i64>
17696 %load = load <2 x i64>, ptr %__b
17697 %1 = bitcast <2 x i64> %load to <2 x i64>
17698 %2 = icmp ult <2 x i64> %0, %1
17699 %3 = bitcast i8 %__u to <8 x i1>
17700 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17701 %4 = and <2 x i1> %2, %extract.i
17702 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17703 %6 = bitcast <16 x i1> %5 to i16
17708 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17709 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
17710 ; VLX: # %bb.0: # %entry
17711 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17712 ; VLX-NEXT: kmovd %k0, %eax
17713 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17716 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
17717 ; NoVLX: # %bb.0: # %entry
17718 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17719 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17720 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17721 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17722 ; NoVLX-NEXT: kmovw %k0, %eax
17723 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17724 ; NoVLX-NEXT: vzeroupper
17727 %0 = bitcast <2 x i64> %__a to <2 x i64>
17728 %load = load i64, ptr %__b
17729 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17730 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17731 %2 = icmp ult <2 x i64> %0, %1
17732 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17733 %4 = bitcast <16 x i1> %3 to i16
17737 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17738 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
17739 ; VLX: # %bb.0: # %entry
17740 ; VLX-NEXT: kmovd %edi, %k1
17741 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17742 ; VLX-NEXT: kmovd %k0, %eax
17743 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17746 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
17747 ; NoVLX: # %bb.0: # %entry
17748 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17749 ; NoVLX-NEXT: kmovw %edi, %k1
17750 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17751 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17752 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17753 ; NoVLX-NEXT: kmovw %k0, %eax
17754 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17755 ; NoVLX-NEXT: vzeroupper
17758 %0 = bitcast <2 x i64> %__a to <2 x i64>
17759 %load = load i64, ptr %__b
17760 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17761 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17762 %2 = icmp ult <2 x i64> %0, %1
17763 %3 = bitcast i8 %__u to <8 x i1>
17764 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17765 %4 = and <2 x i1> %extract.i, %2
17766 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17767 %6 = bitcast <16 x i1> %5 to i16
17772 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17773 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
17774 ; VLX: # %bb.0: # %entry
17775 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17776 ; VLX-NEXT: kmovd %k0, %eax
17779 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
17780 ; NoVLX: # %bb.0: # %entry
17781 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17782 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17783 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17784 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17785 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17786 ; NoVLX-NEXT: kmovw %k0, %eax
17787 ; NoVLX-NEXT: vzeroupper
17790 %0 = bitcast <2 x i64> %__a to <2 x i64>
17791 %1 = bitcast <2 x i64> %__b to <2 x i64>
17792 %2 = icmp ult <2 x i64> %0, %1
17793 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17794 %4 = bitcast <32 x i1> %3 to i32
17798 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17799 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
17800 ; VLX: # %bb.0: # %entry
17801 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17802 ; VLX-NEXT: kmovd %k0, %eax
17805 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
17806 ; NoVLX: # %bb.0: # %entry
17807 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17808 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17809 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17810 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17811 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17812 ; NoVLX-NEXT: kmovw %k0, %eax
17813 ; NoVLX-NEXT: vzeroupper
17816 %0 = bitcast <2 x i64> %__a to <2 x i64>
17817 %load = load <2 x i64>, ptr %__b
17818 %1 = bitcast <2 x i64> %load to <2 x i64>
17819 %2 = icmp ult <2 x i64> %0, %1
17820 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17821 %4 = bitcast <32 x i1> %3 to i32
17825 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17826 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
17827 ; VLX: # %bb.0: # %entry
17828 ; VLX-NEXT: kmovd %edi, %k1
17829 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17830 ; VLX-NEXT: kmovd %k0, %eax
17833 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
17834 ; NoVLX: # %bb.0: # %entry
17835 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17836 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17837 ; NoVLX-NEXT: kmovw %edi, %k1
17838 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17839 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17840 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17841 ; NoVLX-NEXT: kmovw %k0, %eax
17842 ; NoVLX-NEXT: vzeroupper
17845 %0 = bitcast <2 x i64> %__a to <2 x i64>
17846 %1 = bitcast <2 x i64> %__b to <2 x i64>
17847 %2 = icmp ult <2 x i64> %0, %1
17848 %3 = bitcast i8 %__u to <8 x i1>
17849 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17850 %4 = and <2 x i1> %2, %extract.i
17851 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17852 %6 = bitcast <32 x i1> %5 to i32
17856 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17857 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
17858 ; VLX: # %bb.0: # %entry
17859 ; VLX-NEXT: kmovd %edi, %k1
17860 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17861 ; VLX-NEXT: kmovd %k0, %eax
17864 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
17865 ; NoVLX: # %bb.0: # %entry
17866 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17867 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17868 ; NoVLX-NEXT: kmovw %edi, %k1
17869 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17870 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17871 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17872 ; NoVLX-NEXT: kmovw %k0, %eax
17873 ; NoVLX-NEXT: vzeroupper
17876 %0 = bitcast <2 x i64> %__a to <2 x i64>
17877 %load = load <2 x i64>, ptr %__b
17878 %1 = bitcast <2 x i64> %load to <2 x i64>
17879 %2 = icmp ult <2 x i64> %0, %1
17880 %3 = bitcast i8 %__u to <8 x i1>
17881 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17882 %4 = and <2 x i1> %2, %extract.i
17883 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17884 %6 = bitcast <32 x i1> %5 to i32
17889 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17890 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
17891 ; VLX: # %bb.0: # %entry
17892 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17893 ; VLX-NEXT: kmovd %k0, %eax
17896 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
17897 ; NoVLX: # %bb.0: # %entry
17898 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17899 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
17900 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17901 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17902 ; NoVLX-NEXT: kmovw %k0, %eax
17903 ; NoVLX-NEXT: vzeroupper
17906 %0 = bitcast <2 x i64> %__a to <2 x i64>
17907 %load = load i64, ptr %__b
17908 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17909 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17910 %2 = icmp ult <2 x i64> %0, %1
17911 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17912 %4 = bitcast <32 x i1> %3 to i32
17916 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
17917 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
17918 ; VLX: # %bb.0: # %entry
17919 ; VLX-NEXT: kmovd %edi, %k1
17920 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17921 ; VLX-NEXT: kmovd %k0, %eax
17924 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
17925 ; NoVLX: # %bb.0: # %entry
17926 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17927 ; NoVLX-NEXT: kmovw %edi, %k1
17928 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
17929 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17930 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17931 ; NoVLX-NEXT: kmovw %k0, %eax
17932 ; NoVLX-NEXT: vzeroupper
17935 %0 = bitcast <2 x i64> %__a to <2 x i64>
17936 %load = load i64, ptr %__b
17937 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17938 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17939 %2 = icmp ult <2 x i64> %0, %1
17940 %3 = bitcast i8 %__u to <8 x i1>
17941 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17942 %4 = and <2 x i1> %extract.i, %2
17943 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17944 %6 = bitcast <32 x i1> %5 to i32
17949 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17950 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
17951 ; VLX: # %bb.0: # %entry
17952 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17953 ; VLX-NEXT: kmovq %k0, %rax
17956 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
17957 ; NoVLX: # %bb.0: # %entry
17958 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17959 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17960 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17961 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17962 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17963 ; NoVLX-NEXT: kmovw %k0, %eax
17964 ; NoVLX-NEXT: vzeroupper
17967 %0 = bitcast <2 x i64> %__a to <2 x i64>
17968 %1 = bitcast <2 x i64> %__b to <2 x i64>
17969 %2 = icmp ult <2 x i64> %0, %1
17970 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17971 %4 = bitcast <64 x i1> %3 to i64
17975 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
17976 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
17977 ; VLX: # %bb.0: # %entry
17978 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17979 ; VLX-NEXT: kmovq %k0, %rax
17982 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
17983 ; NoVLX: # %bb.0: # %entry
17984 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17985 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17986 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17987 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17988 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17989 ; NoVLX-NEXT: kmovw %k0, %eax
17990 ; NoVLX-NEXT: vzeroupper
17993 %0 = bitcast <2 x i64> %__a to <2 x i64>
17994 %load = load <2 x i64>, ptr %__b
17995 %1 = bitcast <2 x i64> %load to <2 x i64>
17996 %2 = icmp ult <2 x i64> %0, %1
17997 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17998 %4 = bitcast <64 x i1> %3 to i64
18002 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18003 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18004 ; VLX: # %bb.0: # %entry
18005 ; VLX-NEXT: kmovd %edi, %k1
18006 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
18007 ; VLX-NEXT: kmovq %k0, %rax
18010 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18011 ; NoVLX: # %bb.0: # %entry
18012 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
18013 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18014 ; NoVLX-NEXT: kmovw %edi, %k1
18015 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18016 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18017 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18018 ; NoVLX-NEXT: kmovw %k0, %eax
18019 ; NoVLX-NEXT: vzeroupper
18022 %0 = bitcast <2 x i64> %__a to <2 x i64>
18023 %1 = bitcast <2 x i64> %__b to <2 x i64>
18024 %2 = icmp ult <2 x i64> %0, %1
18025 %3 = bitcast i8 %__u to <8 x i1>
18026 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18027 %4 = and <2 x i1> %2, %extract.i
18028 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18029 %6 = bitcast <64 x i1> %5 to i64
18033 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
18034 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18035 ; VLX: # %bb.0: # %entry
18036 ; VLX-NEXT: kmovd %edi, %k1
18037 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
18038 ; VLX-NEXT: kmovq %k0, %rax
18041 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18042 ; NoVLX: # %bb.0: # %entry
18043 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18044 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
18045 ; NoVLX-NEXT: kmovw %edi, %k1
18046 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18047 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18048 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18049 ; NoVLX-NEXT: kmovw %k0, %eax
18050 ; NoVLX-NEXT: vzeroupper
18053 %0 = bitcast <2 x i64> %__a to <2 x i64>
18054 %load = load <2 x i64>, ptr %__b
18055 %1 = bitcast <2 x i64> %load to <2 x i64>
18056 %2 = icmp ult <2 x i64> %0, %1
18057 %3 = bitcast i8 %__u to <8 x i1>
18058 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18059 %4 = and <2 x i1> %2, %extract.i
18060 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18061 %6 = bitcast <64 x i1> %5 to i64
18066 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
18067 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18068 ; VLX: # %bb.0: # %entry
18069 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
18070 ; VLX-NEXT: kmovq %k0, %rax
18073 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18074 ; NoVLX: # %bb.0: # %entry
18075 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18076 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18077 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18078 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18079 ; NoVLX-NEXT: kmovw %k0, %eax
18080 ; NoVLX-NEXT: vzeroupper
18083 %0 = bitcast <2 x i64> %__a to <2 x i64>
18084 %load = load i64, ptr %__b
18085 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18086 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18087 %2 = icmp ult <2 x i64> %0, %1
18088 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18089 %4 = bitcast <64 x i1> %3 to i64
18093 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
18094 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18095 ; VLX: # %bb.0: # %entry
18096 ; VLX-NEXT: kmovd %edi, %k1
18097 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18098 ; VLX-NEXT: kmovq %k0, %rax
18101 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18102 ; NoVLX: # %bb.0: # %entry
18103 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18104 ; NoVLX-NEXT: kmovw %edi, %k1
18105 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18106 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18107 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18108 ; NoVLX-NEXT: kmovw %k0, %eax
18109 ; NoVLX-NEXT: vzeroupper
18112 %0 = bitcast <2 x i64> %__a to <2 x i64>
18113 %load = load i64, ptr %__b
18114 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18115 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18116 %2 = icmp ult <2 x i64> %0, %1
18117 %3 = bitcast i8 %__u to <8 x i1>
18118 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18119 %4 = and <2 x i1> %extract.i, %2
18120 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18121 %6 = bitcast <64 x i1> %5 to i64
18126 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18127 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18128 ; VLX: # %bb.0: # %entry
18129 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18130 ; VLX-NEXT: kmovd %k0, %eax
18131 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18132 ; VLX-NEXT: vzeroupper
18135 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18136 ; NoVLX: # %bb.0: # %entry
18137 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18138 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18139 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18140 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18141 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18142 ; NoVLX-NEXT: kmovw %k0, %eax
18143 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18144 ; NoVLX-NEXT: vzeroupper
18147 %0 = bitcast <4 x i64> %__a to <4 x i64>
18148 %1 = bitcast <4 x i64> %__b to <4 x i64>
18149 %2 = icmp ult <4 x i64> %0, %1
18150 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18151 %4 = bitcast <8 x i1> %3 to i8
18155 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18156 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18157 ; VLX: # %bb.0: # %entry
18158 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18159 ; VLX-NEXT: kmovd %k0, %eax
18160 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18161 ; VLX-NEXT: vzeroupper
18164 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18165 ; NoVLX: # %bb.0: # %entry
18166 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18167 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18168 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18169 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18170 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18171 ; NoVLX-NEXT: kmovw %k0, %eax
18172 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18173 ; NoVLX-NEXT: vzeroupper
18176 %0 = bitcast <4 x i64> %__a to <4 x i64>
18177 %load = load <4 x i64>, ptr %__b
18178 %1 = bitcast <4 x i64> %load to <4 x i64>
18179 %2 = icmp ult <4 x i64> %0, %1
18180 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18181 %4 = bitcast <8 x i1> %3 to i8
18185 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18186 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18187 ; VLX: # %bb.0: # %entry
18188 ; VLX-NEXT: kmovd %edi, %k1
18189 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18190 ; VLX-NEXT: kmovd %k0, %eax
18191 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18192 ; VLX-NEXT: vzeroupper
18195 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18196 ; NoVLX: # %bb.0: # %entry
18197 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18198 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18199 ; NoVLX-NEXT: kmovw %edi, %k1
18200 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18201 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18202 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18203 ; NoVLX-NEXT: kmovw %k0, %eax
18204 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18205 ; NoVLX-NEXT: vzeroupper
18208 %0 = bitcast <4 x i64> %__a to <4 x i64>
18209 %1 = bitcast <4 x i64> %__b to <4 x i64>
18210 %2 = icmp ult <4 x i64> %0, %1
18211 %3 = bitcast i8 %__u to <8 x i1>
18212 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18213 %4 = and <4 x i1> %2, %extract.i
18214 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18215 %6 = bitcast <8 x i1> %5 to i8
18219 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18220 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18221 ; VLX: # %bb.0: # %entry
18222 ; VLX-NEXT: kmovd %edi, %k1
18223 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18224 ; VLX-NEXT: kmovd %k0, %eax
18225 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18226 ; VLX-NEXT: vzeroupper
18229 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18230 ; NoVLX: # %bb.0: # %entry
18231 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18232 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18233 ; NoVLX-NEXT: kmovw %edi, %k1
18234 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18235 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18236 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18237 ; NoVLX-NEXT: kmovw %k0, %eax
18238 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18239 ; NoVLX-NEXT: vzeroupper
18242 %0 = bitcast <4 x i64> %__a to <4 x i64>
18243 %load = load <4 x i64>, ptr %__b
18244 %1 = bitcast <4 x i64> %load to <4 x i64>
18245 %2 = icmp ult <4 x i64> %0, %1
18246 %3 = bitcast i8 %__u to <8 x i1>
18247 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18248 %4 = and <4 x i1> %2, %extract.i
18249 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18250 %6 = bitcast <8 x i1> %5 to i8
18255 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18256 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18257 ; VLX: # %bb.0: # %entry
18258 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18259 ; VLX-NEXT: kmovd %k0, %eax
18260 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18261 ; VLX-NEXT: vzeroupper
18264 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18265 ; NoVLX: # %bb.0: # %entry
18266 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18267 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18268 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18269 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18270 ; NoVLX-NEXT: kmovw %k0, %eax
18271 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18272 ; NoVLX-NEXT: vzeroupper
18275 %0 = bitcast <4 x i64> %__a to <4 x i64>
18276 %load = load i64, ptr %__b
18277 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18278 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18279 %2 = icmp ult <4 x i64> %0, %1
18280 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18281 %4 = bitcast <8 x i1> %3 to i8
18285 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18286 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18287 ; VLX: # %bb.0: # %entry
18288 ; VLX-NEXT: kmovd %edi, %k1
18289 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18290 ; VLX-NEXT: kmovd %k0, %eax
18291 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18292 ; VLX-NEXT: vzeroupper
18295 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18296 ; NoVLX: # %bb.0: # %entry
18297 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18298 ; NoVLX-NEXT: kmovw %edi, %k1
18299 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18300 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18301 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18302 ; NoVLX-NEXT: kmovw %k0, %eax
18303 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18304 ; NoVLX-NEXT: vzeroupper
18307 %0 = bitcast <4 x i64> %__a to <4 x i64>
18308 %load = load i64, ptr %__b
18309 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18310 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18311 %2 = icmp ult <4 x i64> %0, %1
18312 %3 = bitcast i8 %__u to <8 x i1>
18313 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18314 %4 = and <4 x i1> %extract.i, %2
18315 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18316 %6 = bitcast <8 x i1> %5 to i8
18321 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18322 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18323 ; VLX: # %bb.0: # %entry
18324 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18325 ; VLX-NEXT: kmovd %k0, %eax
18326 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18327 ; VLX-NEXT: vzeroupper
18330 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18331 ; NoVLX: # %bb.0: # %entry
18332 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18333 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18334 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18335 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18336 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18337 ; NoVLX-NEXT: kmovw %k0, %eax
18338 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18339 ; NoVLX-NEXT: vzeroupper
18342 %0 = bitcast <4 x i64> %__a to <4 x i64>
18343 %1 = bitcast <4 x i64> %__b to <4 x i64>
18344 %2 = icmp ult <4 x i64> %0, %1
18345 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18346 %4 = bitcast <16 x i1> %3 to i16
18350 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18351 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18352 ; VLX: # %bb.0: # %entry
18353 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18354 ; VLX-NEXT: kmovd %k0, %eax
18355 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18356 ; VLX-NEXT: vzeroupper
18359 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18360 ; NoVLX: # %bb.0: # %entry
18361 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18362 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18363 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18364 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18365 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18366 ; NoVLX-NEXT: kmovw %k0, %eax
18367 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18368 ; NoVLX-NEXT: vzeroupper
18371 %0 = bitcast <4 x i64> %__a to <4 x i64>
18372 %load = load <4 x i64>, ptr %__b
18373 %1 = bitcast <4 x i64> %load to <4 x i64>
18374 %2 = icmp ult <4 x i64> %0, %1
18375 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18376 %4 = bitcast <16 x i1> %3 to i16
18380 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18381 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18382 ; VLX: # %bb.0: # %entry
18383 ; VLX-NEXT: kmovd %edi, %k1
18384 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18385 ; VLX-NEXT: kmovd %k0, %eax
18386 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18387 ; VLX-NEXT: vzeroupper
18390 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18391 ; NoVLX: # %bb.0: # %entry
18392 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18393 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18394 ; NoVLX-NEXT: kmovw %edi, %k1
18395 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18396 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18397 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18398 ; NoVLX-NEXT: kmovw %k0, %eax
18399 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18400 ; NoVLX-NEXT: vzeroupper
18403 %0 = bitcast <4 x i64> %__a to <4 x i64>
18404 %1 = bitcast <4 x i64> %__b to <4 x i64>
18405 %2 = icmp ult <4 x i64> %0, %1
18406 %3 = bitcast i8 %__u to <8 x i1>
18407 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18408 %4 = and <4 x i1> %2, %extract.i
18409 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18410 %6 = bitcast <16 x i1> %5 to i16
18414 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18415 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18416 ; VLX: # %bb.0: # %entry
18417 ; VLX-NEXT: kmovd %edi, %k1
18418 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18419 ; VLX-NEXT: kmovd %k0, %eax
18420 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18421 ; VLX-NEXT: vzeroupper
18424 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18425 ; NoVLX: # %bb.0: # %entry
18426 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18427 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18428 ; NoVLX-NEXT: kmovw %edi, %k1
18429 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18430 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18431 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18432 ; NoVLX-NEXT: kmovw %k0, %eax
18433 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18434 ; NoVLX-NEXT: vzeroupper
18437 %0 = bitcast <4 x i64> %__a to <4 x i64>
18438 %load = load <4 x i64>, ptr %__b
18439 %1 = bitcast <4 x i64> %load to <4 x i64>
18440 %2 = icmp ult <4 x i64> %0, %1
18441 %3 = bitcast i8 %__u to <8 x i1>
18442 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18443 %4 = and <4 x i1> %2, %extract.i
18444 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18445 %6 = bitcast <16 x i1> %5 to i16
18450 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18451 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18452 ; VLX: # %bb.0: # %entry
18453 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18454 ; VLX-NEXT: kmovd %k0, %eax
18455 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18456 ; VLX-NEXT: vzeroupper
18459 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18460 ; NoVLX: # %bb.0: # %entry
18461 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18462 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18463 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18464 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18465 ; NoVLX-NEXT: kmovw %k0, %eax
18466 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18467 ; NoVLX-NEXT: vzeroupper
18470 %0 = bitcast <4 x i64> %__a to <4 x i64>
18471 %load = load i64, ptr %__b
18472 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18473 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18474 %2 = icmp ult <4 x i64> %0, %1
18475 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18476 %4 = bitcast <16 x i1> %3 to i16
18480 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18481 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18482 ; VLX: # %bb.0: # %entry
18483 ; VLX-NEXT: kmovd %edi, %k1
18484 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18485 ; VLX-NEXT: kmovd %k0, %eax
18486 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18487 ; VLX-NEXT: vzeroupper
18490 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18491 ; NoVLX: # %bb.0: # %entry
18492 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18493 ; NoVLX-NEXT: kmovw %edi, %k1
18494 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18495 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18496 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18497 ; NoVLX-NEXT: kmovw %k0, %eax
18498 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18499 ; NoVLX-NEXT: vzeroupper
18502 %0 = bitcast <4 x i64> %__a to <4 x i64>
18503 %load = load i64, ptr %__b
18504 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18505 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18506 %2 = icmp ult <4 x i64> %0, %1
18507 %3 = bitcast i8 %__u to <8 x i1>
18508 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18509 %4 = and <4 x i1> %extract.i, %2
18510 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18511 %6 = bitcast <16 x i1> %5 to i16
18516 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18517 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18518 ; VLX: # %bb.0: # %entry
18519 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18520 ; VLX-NEXT: kmovd %k0, %eax
18521 ; VLX-NEXT: vzeroupper
18524 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18525 ; NoVLX: # %bb.0: # %entry
18526 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18527 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18528 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18529 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18530 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18531 ; NoVLX-NEXT: kmovw %k0, %eax
18532 ; NoVLX-NEXT: vzeroupper
18535 %0 = bitcast <4 x i64> %__a to <4 x i64>
18536 %1 = bitcast <4 x i64> %__b to <4 x i64>
18537 %2 = icmp ult <4 x i64> %0, %1
18538 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18539 %4 = bitcast <32 x i1> %3 to i32
18543 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18544 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18545 ; VLX: # %bb.0: # %entry
18546 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18547 ; VLX-NEXT: kmovd %k0, %eax
18548 ; VLX-NEXT: vzeroupper
18551 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18552 ; NoVLX: # %bb.0: # %entry
18553 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18554 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18555 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18556 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18557 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18558 ; NoVLX-NEXT: kmovw %k0, %eax
18559 ; NoVLX-NEXT: vzeroupper
18562 %0 = bitcast <4 x i64> %__a to <4 x i64>
18563 %load = load <4 x i64>, ptr %__b
18564 %1 = bitcast <4 x i64> %load to <4 x i64>
18565 %2 = icmp ult <4 x i64> %0, %1
18566 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18567 %4 = bitcast <32 x i1> %3 to i32
18571 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18572 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18573 ; VLX: # %bb.0: # %entry
18574 ; VLX-NEXT: kmovd %edi, %k1
18575 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18576 ; VLX-NEXT: kmovd %k0, %eax
18577 ; VLX-NEXT: vzeroupper
18580 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18581 ; NoVLX: # %bb.0: # %entry
18582 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18583 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18584 ; NoVLX-NEXT: kmovw %edi, %k1
18585 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18586 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18587 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18588 ; NoVLX-NEXT: kmovw %k0, %eax
18589 ; NoVLX-NEXT: vzeroupper
18592 %0 = bitcast <4 x i64> %__a to <4 x i64>
18593 %1 = bitcast <4 x i64> %__b to <4 x i64>
18594 %2 = icmp ult <4 x i64> %0, %1
18595 %3 = bitcast i8 %__u to <8 x i1>
18596 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18597 %4 = and <4 x i1> %2, %extract.i
18598 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18599 %6 = bitcast <32 x i1> %5 to i32
18603 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18604 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18605 ; VLX: # %bb.0: # %entry
18606 ; VLX-NEXT: kmovd %edi, %k1
18607 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18608 ; VLX-NEXT: kmovd %k0, %eax
18609 ; VLX-NEXT: vzeroupper
18612 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18613 ; NoVLX: # %bb.0: # %entry
18614 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18615 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18616 ; NoVLX-NEXT: kmovw %edi, %k1
18617 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18618 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18619 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18620 ; NoVLX-NEXT: kmovw %k0, %eax
18621 ; NoVLX-NEXT: vzeroupper
18624 %0 = bitcast <4 x i64> %__a to <4 x i64>
18625 %load = load <4 x i64>, ptr %__b
18626 %1 = bitcast <4 x i64> %load to <4 x i64>
18627 %2 = icmp ult <4 x i64> %0, %1
18628 %3 = bitcast i8 %__u to <8 x i1>
18629 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18630 %4 = and <4 x i1> %2, %extract.i
18631 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18632 %6 = bitcast <32 x i1> %5 to i32
18637 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18638 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
18639 ; VLX: # %bb.0: # %entry
18640 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18641 ; VLX-NEXT: kmovd %k0, %eax
18642 ; VLX-NEXT: vzeroupper
18645 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
18646 ; NoVLX: # %bb.0: # %entry
18647 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18648 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18649 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18650 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18651 ; NoVLX-NEXT: kmovw %k0, %eax
18652 ; NoVLX-NEXT: vzeroupper
18655 %0 = bitcast <4 x i64> %__a to <4 x i64>
18656 %load = load i64, ptr %__b
18657 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18658 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18659 %2 = icmp ult <4 x i64> %0, %1
18660 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18661 %4 = bitcast <32 x i1> %3 to i32
18665 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18666 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
18667 ; VLX: # %bb.0: # %entry
18668 ; VLX-NEXT: kmovd %edi, %k1
18669 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18670 ; VLX-NEXT: kmovd %k0, %eax
18671 ; VLX-NEXT: vzeroupper
18674 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
18675 ; NoVLX: # %bb.0: # %entry
18676 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18677 ; NoVLX-NEXT: kmovw %edi, %k1
18678 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18679 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18680 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18681 ; NoVLX-NEXT: kmovw %k0, %eax
18682 ; NoVLX-NEXT: vzeroupper
18685 %0 = bitcast <4 x i64> %__a to <4 x i64>
18686 %load = load i64, ptr %__b
18687 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18688 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18689 %2 = icmp ult <4 x i64> %0, %1
18690 %3 = bitcast i8 %__u to <8 x i1>
18691 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18692 %4 = and <4 x i1> %extract.i, %2
18693 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18694 %6 = bitcast <32 x i1> %5 to i32
18699 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18700 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
18701 ; VLX: # %bb.0: # %entry
18702 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18703 ; VLX-NEXT: kmovq %k0, %rax
18704 ; VLX-NEXT: vzeroupper
18707 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
18708 ; NoVLX: # %bb.0: # %entry
18709 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18710 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18711 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18712 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18713 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18714 ; NoVLX-NEXT: kmovw %k0, %eax
18715 ; NoVLX-NEXT: vzeroupper
18718 %0 = bitcast <4 x i64> %__a to <4 x i64>
18719 %1 = bitcast <4 x i64> %__b to <4 x i64>
18720 %2 = icmp ult <4 x i64> %0, %1
18721 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18722 %4 = bitcast <64 x i1> %3 to i64
18726 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18727 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
18728 ; VLX: # %bb.0: # %entry
18729 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18730 ; VLX-NEXT: kmovq %k0, %rax
18731 ; VLX-NEXT: vzeroupper
18734 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
18735 ; NoVLX: # %bb.0: # %entry
18736 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18737 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18738 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18739 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18740 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18741 ; NoVLX-NEXT: kmovw %k0, %eax
18742 ; NoVLX-NEXT: vzeroupper
18745 %0 = bitcast <4 x i64> %__a to <4 x i64>
18746 %load = load <4 x i64>, ptr %__b
18747 %1 = bitcast <4 x i64> %load to <4 x i64>
18748 %2 = icmp ult <4 x i64> %0, %1
18749 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18750 %4 = bitcast <64 x i1> %3 to i64
18754 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18755 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
18756 ; VLX: # %bb.0: # %entry
18757 ; VLX-NEXT: kmovd %edi, %k1
18758 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18759 ; VLX-NEXT: kmovq %k0, %rax
18760 ; VLX-NEXT: vzeroupper
18763 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
18764 ; NoVLX: # %bb.0: # %entry
18765 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18766 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18767 ; NoVLX-NEXT: kmovw %edi, %k1
18768 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18769 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18770 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18771 ; NoVLX-NEXT: kmovw %k0, %eax
18772 ; NoVLX-NEXT: vzeroupper
18775 %0 = bitcast <4 x i64> %__a to <4 x i64>
18776 %1 = bitcast <4 x i64> %__b to <4 x i64>
18777 %2 = icmp ult <4 x i64> %0, %1
18778 %3 = bitcast i8 %__u to <8 x i1>
18779 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18780 %4 = and <4 x i1> %2, %extract.i
18781 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18782 %6 = bitcast <64 x i1> %5 to i64
18786 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18787 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
18788 ; VLX: # %bb.0: # %entry
18789 ; VLX-NEXT: kmovd %edi, %k1
18790 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18791 ; VLX-NEXT: kmovq %k0, %rax
18792 ; VLX-NEXT: vzeroupper
18795 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
18796 ; NoVLX: # %bb.0: # %entry
18797 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18798 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18799 ; NoVLX-NEXT: kmovw %edi, %k1
18800 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18801 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18802 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18803 ; NoVLX-NEXT: kmovw %k0, %eax
18804 ; NoVLX-NEXT: vzeroupper
18807 %0 = bitcast <4 x i64> %__a to <4 x i64>
18808 %load = load <4 x i64>, ptr %__b
18809 %1 = bitcast <4 x i64> %load to <4 x i64>
18810 %2 = icmp ult <4 x i64> %0, %1
18811 %3 = bitcast i8 %__u to <8 x i1>
18812 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18813 %4 = and <4 x i1> %2, %extract.i
18814 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18815 %6 = bitcast <64 x i1> %5 to i64
18820 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
18821 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
18822 ; VLX: # %bb.0: # %entry
18823 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18824 ; VLX-NEXT: kmovq %k0, %rax
18825 ; VLX-NEXT: vzeroupper
18828 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
18829 ; NoVLX: # %bb.0: # %entry
18830 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18831 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18832 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18833 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18834 ; NoVLX-NEXT: kmovw %k0, %eax
18835 ; NoVLX-NEXT: vzeroupper
18838 %0 = bitcast <4 x i64> %__a to <4 x i64>
18839 %load = load i64, ptr %__b
18840 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18841 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18842 %2 = icmp ult <4 x i64> %0, %1
18843 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18844 %4 = bitcast <64 x i1> %3 to i64
18848 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
18849 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
18850 ; VLX: # %bb.0: # %entry
18851 ; VLX-NEXT: kmovd %edi, %k1
18852 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18853 ; VLX-NEXT: kmovq %k0, %rax
18854 ; VLX-NEXT: vzeroupper
18857 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
18858 ; NoVLX: # %bb.0: # %entry
18859 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18860 ; NoVLX-NEXT: kmovw %edi, %k1
18861 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
18862 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18863 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18864 ; NoVLX-NEXT: kmovw %k0, %eax
18865 ; NoVLX-NEXT: vzeroupper
18868 %0 = bitcast <4 x i64> %__a to <4 x i64>
18869 %load = load i64, ptr %__b
18870 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18871 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18872 %2 = icmp ult <4 x i64> %0, %1
18873 %3 = bitcast i8 %__u to <8 x i1>
18874 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18875 %4 = and <4 x i1> %extract.i, %2
18876 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18877 %6 = bitcast <64 x i1> %5 to i64
18882 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
18883 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
18884 ; VLX: # %bb.0: # %entry
18885 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18886 ; VLX-NEXT: kmovd %k0, %eax
18887 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18888 ; VLX-NEXT: vzeroupper
18891 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
18892 ; NoVLX: # %bb.0: # %entry
18893 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18894 ; NoVLX-NEXT: kmovw %k0, %eax
18895 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18896 ; NoVLX-NEXT: vzeroupper
18899 %0 = bitcast <8 x i64> %__a to <8 x i64>
18900 %1 = bitcast <8 x i64> %__b to <8 x i64>
18901 %2 = icmp ult <8 x i64> %0, %1
18902 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18903 %4 = bitcast <16 x i1> %3 to i16
18907 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
18908 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
18909 ; VLX: # %bb.0: # %entry
18910 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
18911 ; VLX-NEXT: kmovd %k0, %eax
18912 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18913 ; VLX-NEXT: vzeroupper
18916 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
18917 ; NoVLX: # %bb.0: # %entry
18918 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
18919 ; NoVLX-NEXT: kmovw %k0, %eax
18920 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18921 ; NoVLX-NEXT: vzeroupper
18924 %0 = bitcast <8 x i64> %__a to <8 x i64>
18925 %load = load <8 x i64>, ptr %__b
18926 %1 = bitcast <8 x i64> %load to <8 x i64>
18927 %2 = icmp ult <8 x i64> %0, %1
18928 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18929 %4 = bitcast <16 x i1> %3 to i16
18933 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
18934 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
18935 ; VLX: # %bb.0: # %entry
18936 ; VLX-NEXT: kmovd %edi, %k1
18937 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18938 ; VLX-NEXT: kmovd %k0, %eax
18939 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18940 ; VLX-NEXT: vzeroupper
18943 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
18944 ; NoVLX: # %bb.0: # %entry
18945 ; NoVLX-NEXT: kmovw %edi, %k1
18946 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18947 ; NoVLX-NEXT: kmovw %k0, %eax
18948 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18949 ; NoVLX-NEXT: vzeroupper
18952 %0 = bitcast <8 x i64> %__a to <8 x i64>
18953 %1 = bitcast <8 x i64> %__b to <8 x i64>
18954 %2 = icmp ult <8 x i64> %0, %1
18955 %3 = bitcast i8 %__u to <8 x i1>
18956 %4 = and <8 x i1> %2, %3
18957 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18958 %6 = bitcast <16 x i1> %5 to i16
18962 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
18963 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
18964 ; VLX: # %bb.0: # %entry
18965 ; VLX-NEXT: kmovd %edi, %k1
18966 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
18967 ; VLX-NEXT: kmovd %k0, %eax
18968 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18969 ; VLX-NEXT: vzeroupper
18972 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
18973 ; NoVLX: # %bb.0: # %entry
18974 ; NoVLX-NEXT: kmovw %edi, %k1
18975 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
18976 ; NoVLX-NEXT: kmovw %k0, %eax
18977 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18978 ; NoVLX-NEXT: vzeroupper
18981 %0 = bitcast <8 x i64> %__a to <8 x i64>
18982 %load = load <8 x i64>, ptr %__b
18983 %1 = bitcast <8 x i64> %load to <8 x i64>
18984 %2 = icmp ult <8 x i64> %0, %1
18985 %3 = bitcast i8 %__u to <8 x i1>
18986 %4 = and <8 x i1> %2, %3
18987 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
18988 %6 = bitcast <16 x i1> %5 to i16
18993 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
18994 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
18995 ; VLX: # %bb.0: # %entry
18996 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
18997 ; VLX-NEXT: kmovd %k0, %eax
18998 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18999 ; VLX-NEXT: vzeroupper
19002 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
19003 ; NoVLX: # %bb.0: # %entry
19004 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19005 ; NoVLX-NEXT: kmovw %k0, %eax
19006 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19007 ; NoVLX-NEXT: vzeroupper
19010 %0 = bitcast <8 x i64> %__a to <8 x i64>
19011 %load = load i64, ptr %__b
19012 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19013 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19014 %2 = icmp ult <8 x i64> %0, %1
19015 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19016 %4 = bitcast <16 x i1> %3 to i16
19020 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
19021 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19022 ; VLX: # %bb.0: # %entry
19023 ; VLX-NEXT: kmovd %edi, %k1
19024 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19025 ; VLX-NEXT: kmovd %k0, %eax
19026 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19027 ; VLX-NEXT: vzeroupper
19030 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19031 ; NoVLX: # %bb.0: # %entry
19032 ; NoVLX-NEXT: kmovw %edi, %k1
19033 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19034 ; NoVLX-NEXT: kmovw %k0, %eax
19035 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19036 ; NoVLX-NEXT: vzeroupper
19039 %0 = bitcast <8 x i64> %__a to <8 x i64>
19040 %load = load i64, ptr %__b
19041 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19042 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19043 %2 = icmp ult <8 x i64> %0, %1
19044 %3 = bitcast i8 %__u to <8 x i1>
19045 %4 = and <8 x i1> %3, %2
19046 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19047 %6 = bitcast <16 x i1> %5 to i16
19052 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19053 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19054 ; VLX: # %bb.0: # %entry
19055 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19056 ; VLX-NEXT: kmovd %k0, %eax
19057 ; VLX-NEXT: vzeroupper
19060 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19061 ; NoVLX: # %bb.0: # %entry
19062 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19063 ; NoVLX-NEXT: kmovw %k0, %eax
19064 ; NoVLX-NEXT: vzeroupper
19067 %0 = bitcast <8 x i64> %__a to <8 x i64>
19068 %1 = bitcast <8 x i64> %__b to <8 x i64>
19069 %2 = icmp ult <8 x i64> %0, %1
19070 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19071 %4 = bitcast <32 x i1> %3 to i32
19075 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
19076 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19077 ; VLX: # %bb.0: # %entry
19078 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19079 ; VLX-NEXT: kmovd %k0, %eax
19080 ; VLX-NEXT: vzeroupper
19083 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19084 ; NoVLX: # %bb.0: # %entry
19085 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19086 ; NoVLX-NEXT: kmovw %k0, %eax
19087 ; NoVLX-NEXT: vzeroupper
19090 %0 = bitcast <8 x i64> %__a to <8 x i64>
19091 %load = load <8 x i64>, ptr %__b
19092 %1 = bitcast <8 x i64> %load to <8 x i64>
19093 %2 = icmp ult <8 x i64> %0, %1
19094 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19095 %4 = bitcast <32 x i1> %3 to i32
19099 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19100 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19101 ; VLX: # %bb.0: # %entry
19102 ; VLX-NEXT: kmovd %edi, %k1
19103 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19104 ; VLX-NEXT: kmovd %k0, %eax
19105 ; VLX-NEXT: vzeroupper
19108 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19109 ; NoVLX: # %bb.0: # %entry
19110 ; NoVLX-NEXT: kmovw %edi, %k1
19111 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19112 ; NoVLX-NEXT: kmovw %k0, %eax
19113 ; NoVLX-NEXT: vzeroupper
19116 %0 = bitcast <8 x i64> %__a to <8 x i64>
19117 %1 = bitcast <8 x i64> %__b to <8 x i64>
19118 %2 = icmp ult <8 x i64> %0, %1
19119 %3 = bitcast i8 %__u to <8 x i1>
19120 %4 = and <8 x i1> %2, %3
19121 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19122 %6 = bitcast <32 x i1> %5 to i32
19126 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
19127 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19128 ; VLX: # %bb.0: # %entry
19129 ; VLX-NEXT: kmovd %edi, %k1
19130 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19131 ; VLX-NEXT: kmovd %k0, %eax
19132 ; VLX-NEXT: vzeroupper
19135 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19136 ; NoVLX: # %bb.0: # %entry
19137 ; NoVLX-NEXT: kmovw %edi, %k1
19138 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19139 ; NoVLX-NEXT: kmovw %k0, %eax
19140 ; NoVLX-NEXT: vzeroupper
19143 %0 = bitcast <8 x i64> %__a to <8 x i64>
19144 %load = load <8 x i64>, ptr %__b
19145 %1 = bitcast <8 x i64> %load to <8 x i64>
19146 %2 = icmp ult <8 x i64> %0, %1
19147 %3 = bitcast i8 %__u to <8 x i1>
19148 %4 = and <8 x i1> %2, %3
19149 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19150 %6 = bitcast <32 x i1> %5 to i32
19155 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
19156 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19157 ; VLX: # %bb.0: # %entry
19158 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19159 ; VLX-NEXT: kmovd %k0, %eax
19160 ; VLX-NEXT: vzeroupper
19163 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19164 ; NoVLX: # %bb.0: # %entry
19165 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19166 ; NoVLX-NEXT: kmovw %k0, %eax
19167 ; NoVLX-NEXT: vzeroupper
19170 %0 = bitcast <8 x i64> %__a to <8 x i64>
19171 %load = load i64, ptr %__b
19172 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19173 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19174 %2 = icmp ult <8 x i64> %0, %1
19175 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19176 %4 = bitcast <32 x i1> %3 to i32
19180 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
19181 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19182 ; VLX: # %bb.0: # %entry
19183 ; VLX-NEXT: kmovd %edi, %k1
19184 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19185 ; VLX-NEXT: kmovd %k0, %eax
19186 ; VLX-NEXT: vzeroupper
19189 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19190 ; NoVLX: # %bb.0: # %entry
19191 ; NoVLX-NEXT: kmovw %edi, %k1
19192 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19193 ; NoVLX-NEXT: kmovw %k0, %eax
19194 ; NoVLX-NEXT: vzeroupper
19197 %0 = bitcast <8 x i64> %__a to <8 x i64>
19198 %load = load i64, ptr %__b
19199 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19200 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19201 %2 = icmp ult <8 x i64> %0, %1
19202 %3 = bitcast i8 %__u to <8 x i1>
19203 %4 = and <8 x i1> %3, %2
19204 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19205 %6 = bitcast <32 x i1> %5 to i32
19210 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19211 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19212 ; VLX: # %bb.0: # %entry
19213 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19214 ; VLX-NEXT: kmovq %k0, %rax
19215 ; VLX-NEXT: vzeroupper
19218 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19219 ; NoVLX: # %bb.0: # %entry
19220 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19221 ; NoVLX-NEXT: kmovw %k0, %eax
19222 ; NoVLX-NEXT: vzeroupper
19225 %0 = bitcast <8 x i64> %__a to <8 x i64>
19226 %1 = bitcast <8 x i64> %__b to <8 x i64>
19227 %2 = icmp ult <8 x i64> %0, %1
19228 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19229 %4 = bitcast <64 x i1> %3 to i64
19233 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
19234 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19235 ; VLX: # %bb.0: # %entry
19236 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19237 ; VLX-NEXT: kmovq %k0, %rax
19238 ; VLX-NEXT: vzeroupper
19241 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19242 ; NoVLX: # %bb.0: # %entry
19243 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19244 ; NoVLX-NEXT: kmovw %k0, %eax
19245 ; NoVLX-NEXT: vzeroupper
19248 %0 = bitcast <8 x i64> %__a to <8 x i64>
19249 %load = load <8 x i64>, ptr %__b
19250 %1 = bitcast <8 x i64> %load to <8 x i64>
19251 %2 = icmp ult <8 x i64> %0, %1
19252 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19253 %4 = bitcast <64 x i1> %3 to i64
19257 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19258 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19259 ; VLX: # %bb.0: # %entry
19260 ; VLX-NEXT: kmovd %edi, %k1
19261 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19262 ; VLX-NEXT: kmovq %k0, %rax
19263 ; VLX-NEXT: vzeroupper
19266 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19267 ; NoVLX: # %bb.0: # %entry
19268 ; NoVLX-NEXT: kmovw %edi, %k1
19269 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19270 ; NoVLX-NEXT: kmovw %k0, %eax
19271 ; NoVLX-NEXT: vzeroupper
19274 %0 = bitcast <8 x i64> %__a to <8 x i64>
19275 %1 = bitcast <8 x i64> %__b to <8 x i64>
19276 %2 = icmp ult <8 x i64> %0, %1
19277 %3 = bitcast i8 %__u to <8 x i1>
19278 %4 = and <8 x i1> %2, %3
19279 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19280 %6 = bitcast <64 x i1> %5 to i64
19284 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
19285 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19286 ; VLX: # %bb.0: # %entry
19287 ; VLX-NEXT: kmovd %edi, %k1
19288 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19289 ; VLX-NEXT: kmovq %k0, %rax
19290 ; VLX-NEXT: vzeroupper
19293 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19294 ; NoVLX: # %bb.0: # %entry
19295 ; NoVLX-NEXT: kmovw %edi, %k1
19296 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19297 ; NoVLX-NEXT: kmovw %k0, %eax
19298 ; NoVLX-NEXT: vzeroupper
19301 %0 = bitcast <8 x i64> %__a to <8 x i64>
19302 %load = load <8 x i64>, ptr %__b
19303 %1 = bitcast <8 x i64> %load to <8 x i64>
19304 %2 = icmp ult <8 x i64> %0, %1
19305 %3 = bitcast i8 %__u to <8 x i1>
19306 %4 = and <8 x i1> %2, %3
19307 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19308 %6 = bitcast <64 x i1> %5 to i64
19313 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
19314 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19315 ; VLX: # %bb.0: # %entry
19316 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19317 ; VLX-NEXT: kmovq %k0, %rax
19318 ; VLX-NEXT: vzeroupper
19321 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19322 ; NoVLX: # %bb.0: # %entry
19323 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19324 ; NoVLX-NEXT: kmovw %k0, %eax
19325 ; NoVLX-NEXT: vzeroupper
19328 %0 = bitcast <8 x i64> %__a to <8 x i64>
19329 %load = load i64, ptr %__b
19330 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19331 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19332 %2 = icmp ult <8 x i64> %0, %1
19333 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19334 %4 = bitcast <64 x i1> %3 to i64
19338 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
19339 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19340 ; VLX: # %bb.0: # %entry
19341 ; VLX-NEXT: kmovd %edi, %k1
19342 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19343 ; VLX-NEXT: kmovq %k0, %rax
19344 ; VLX-NEXT: vzeroupper
19347 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19348 ; NoVLX: # %bb.0: # %entry
19349 ; NoVLX-NEXT: kmovw %edi, %k1
19350 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19351 ; NoVLX-NEXT: kmovw %k0, %eax
19352 ; NoVLX-NEXT: vzeroupper
19355 %0 = bitcast <8 x i64> %__a to <8 x i64>
19356 %load = load i64, ptr %__b
19357 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19358 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19359 %2 = icmp ult <8 x i64> %0, %1
19360 %3 = bitcast i8 %__u to <8 x i1>
19361 %4 = and <8 x i1> %3, %2
19362 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19363 %6 = bitcast <64 x i1> %5 to i64
19368 declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32)
19369 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19370 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19371 ; VLX: # %bb.0: # %entry
19372 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19373 ; VLX-NEXT: kmovd %k0, %eax
19374 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19377 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19378 ; NoVLX: # %bb.0: # %entry
19379 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19380 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19381 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19382 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19383 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19384 ; NoVLX-NEXT: kmovw %k0, %eax
19385 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19386 ; NoVLX-NEXT: vzeroupper
19389 %0 = bitcast <2 x i64> %__a to <4 x float>
19390 %1 = bitcast <2 x i64> %__b to <4 x float>
19391 %2 = fcmp oeq <4 x float> %0, %1
19392 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19393 %4 = bitcast <8 x i1> %3 to i8
19397 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19398 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19399 ; VLX: # %bb.0: # %entry
19400 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19401 ; VLX-NEXT: kmovd %k0, %eax
19402 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19405 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19406 ; NoVLX: # %bb.0: # %entry
19407 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19408 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19409 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19410 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19411 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19412 ; NoVLX-NEXT: kmovw %k0, %eax
19413 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19414 ; NoVLX-NEXT: vzeroupper
19417 %0 = bitcast <2 x i64> %__a to <4 x float>
19418 %load = load <2 x i64>, ptr %__b
19419 %1 = bitcast <2 x i64> %load to <4 x float>
19420 %2 = fcmp oeq <4 x float> %0, %1
19421 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19422 %4 = bitcast <8 x i1> %3 to i8
19426 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19427 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19428 ; VLX: # %bb.0: # %entry
19429 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19430 ; VLX-NEXT: kmovd %k0, %eax
19431 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19434 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19435 ; NoVLX: # %bb.0: # %entry
19436 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19437 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
19438 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19439 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19440 ; NoVLX-NEXT: kmovw %k0, %eax
19441 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19442 ; NoVLX-NEXT: vzeroupper
19445 %0 = bitcast <2 x i64> %__a to <4 x float>
19446 %load = load float, ptr %__b
19447 %vec = insertelement <4 x float> undef, float %load, i32 0
19448 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19449 %2 = fcmp oeq <4 x float> %0, %1
19450 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19451 %4 = bitcast <8 x i1> %3 to i8
19455 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19456 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19457 ; VLX: # %bb.0: # %entry
19458 ; VLX-NEXT: kmovd %edi, %k1
19459 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19460 ; VLX-NEXT: kmovd %k0, %eax
19461 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19464 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19465 ; NoVLX: # %bb.0: # %entry
19466 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19467 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19468 ; NoVLX-NEXT: kmovw %edi, %k1
19469 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19470 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19471 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19472 ; NoVLX-NEXT: kmovw %k0, %eax
19473 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19474 ; NoVLX-NEXT: vzeroupper
19477 %0 = bitcast <2 x i64> %__a to <4 x float>
19478 %1 = bitcast <2 x i64> %__b to <4 x float>
19479 %2 = fcmp oeq <4 x float> %0, %1
19480 %3 = bitcast i4 %__u to <4 x i1>
19481 %4 = and <4 x i1> %2, %3
19482 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19483 %6 = bitcast <8 x i1> %5 to i8
19487 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19488 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19489 ; VLX: # %bb.0: # %entry
19490 ; VLX-NEXT: kmovd %edi, %k1
19491 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19492 ; VLX-NEXT: kmovd %k0, %eax
19493 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19496 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19497 ; NoVLX: # %bb.0: # %entry
19498 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19499 ; NoVLX-NEXT: kmovw %edi, %k1
19500 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19501 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19502 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19503 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19504 ; NoVLX-NEXT: kmovw %k0, %eax
19505 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19506 ; NoVLX-NEXT: vzeroupper
19509 %0 = bitcast <2 x i64> %__a to <4 x float>
19510 %load = load <2 x i64>, ptr %__b
19511 %1 = bitcast <2 x i64> %load to <4 x float>
19512 %2 = fcmp oeq <4 x float> %0, %1
19513 %3 = bitcast i4 %__u to <4 x i1>
19514 %4 = and <4 x i1> %2, %3
19515 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19516 %6 = bitcast <8 x i1> %5 to i8
19520 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19521 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19522 ; VLX: # %bb.0: # %entry
19523 ; VLX-NEXT: kmovd %edi, %k1
19524 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19525 ; VLX-NEXT: kmovd %k0, %eax
19526 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19529 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19530 ; NoVLX: # %bb.0: # %entry
19531 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19532 ; NoVLX-NEXT: kmovw %edi, %k1
19533 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
19534 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19535 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19536 ; NoVLX-NEXT: kmovw %k0, %eax
19537 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19538 ; NoVLX-NEXT: vzeroupper
19541 %0 = bitcast <2 x i64> %__a to <4 x float>
19542 %load = load float, ptr %__b
19543 %vec = insertelement <4 x float> undef, float %load, i32 0
19544 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19545 %2 = fcmp oeq <4 x float> %0, %1
19546 %3 = bitcast i4 %__u to <4 x i1>
19547 %4 = and <4 x i1> %2, %3
19548 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19549 %6 = bitcast <8 x i1> %5 to i8
19555 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19556 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19557 ; VLX: # %bb.0: # %entry
19558 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19559 ; VLX-NEXT: kmovd %k0, %eax
19560 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19563 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19564 ; NoVLX: # %bb.0: # %entry
19565 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19566 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19567 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19568 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19569 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19570 ; NoVLX-NEXT: kmovw %k0, %eax
19571 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19572 ; NoVLX-NEXT: vzeroupper
19575 %0 = bitcast <2 x i64> %__a to <4 x float>
19576 %1 = bitcast <2 x i64> %__b to <4 x float>
19577 %2 = fcmp oeq <4 x float> %0, %1
19578 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19579 %4 = bitcast <16 x i1> %3 to i16
19583 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19584 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19585 ; VLX: # %bb.0: # %entry
19586 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19587 ; VLX-NEXT: kmovd %k0, %eax
19588 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19591 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19592 ; NoVLX: # %bb.0: # %entry
19593 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19594 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19595 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19596 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19597 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19598 ; NoVLX-NEXT: kmovw %k0, %eax
19599 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19600 ; NoVLX-NEXT: vzeroupper
19603 %0 = bitcast <2 x i64> %__a to <4 x float>
19604 %load = load <2 x i64>, ptr %__b
19605 %1 = bitcast <2 x i64> %load to <4 x float>
19606 %2 = fcmp oeq <4 x float> %0, %1
19607 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19608 %4 = bitcast <16 x i1> %3 to i16
19612 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19613 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19614 ; VLX: # %bb.0: # %entry
19615 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19616 ; VLX-NEXT: kmovd %k0, %eax
19617 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19620 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19621 ; NoVLX: # %bb.0: # %entry
19622 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19623 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
19624 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19625 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19626 ; NoVLX-NEXT: kmovw %k0, %eax
19627 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19628 ; NoVLX-NEXT: vzeroupper
19631 %0 = bitcast <2 x i64> %__a to <4 x float>
19632 %load = load float, ptr %__b
19633 %vec = insertelement <4 x float> undef, float %load, i32 0
19634 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19635 %2 = fcmp oeq <4 x float> %0, %1
19636 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19637 %4 = bitcast <16 x i1> %3 to i16
19641 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19642 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
19643 ; VLX: # %bb.0: # %entry
19644 ; VLX-NEXT: kmovd %edi, %k1
19645 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19646 ; VLX-NEXT: kmovd %k0, %eax
19647 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19650 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
19651 ; NoVLX: # %bb.0: # %entry
19652 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19653 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19654 ; NoVLX-NEXT: kmovw %edi, %k1
19655 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19656 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19657 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19658 ; NoVLX-NEXT: kmovw %k0, %eax
19659 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19660 ; NoVLX-NEXT: vzeroupper
19663 %0 = bitcast <2 x i64> %__a to <4 x float>
19664 %1 = bitcast <2 x i64> %__b to <4 x float>
19665 %2 = fcmp oeq <4 x float> %0, %1
19666 %3 = bitcast i4 %__u to <4 x i1>
19667 %4 = and <4 x i1> %2, %3
19668 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19669 %6 = bitcast <16 x i1> %5 to i16
19673 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19674 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
19675 ; VLX: # %bb.0: # %entry
19676 ; VLX-NEXT: kmovd %edi, %k1
19677 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19678 ; VLX-NEXT: kmovd %k0, %eax
19679 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19682 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
19683 ; NoVLX: # %bb.0: # %entry
19684 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19685 ; NoVLX-NEXT: kmovw %edi, %k1
19686 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19687 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19688 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19689 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19690 ; NoVLX-NEXT: kmovw %k0, %eax
19691 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19692 ; NoVLX-NEXT: vzeroupper
19695 %0 = bitcast <2 x i64> %__a to <4 x float>
19696 %load = load <2 x i64>, ptr %__b
19697 %1 = bitcast <2 x i64> %load to <4 x float>
19698 %2 = fcmp oeq <4 x float> %0, %1
19699 %3 = bitcast i4 %__u to <4 x i1>
19700 %4 = and <4 x i1> %2, %3
19701 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19702 %6 = bitcast <16 x i1> %5 to i16
19706 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19707 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19708 ; VLX: # %bb.0: # %entry
19709 ; VLX-NEXT: kmovd %edi, %k1
19710 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19711 ; VLX-NEXT: kmovd %k0, %eax
19712 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19715 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19716 ; NoVLX: # %bb.0: # %entry
19717 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19718 ; NoVLX-NEXT: kmovw %edi, %k1
19719 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
19720 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19721 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19722 ; NoVLX-NEXT: kmovw %k0, %eax
19723 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19724 ; NoVLX-NEXT: vzeroupper
19727 %0 = bitcast <2 x i64> %__a to <4 x float>
19728 %load = load float, ptr %__b
19729 %vec = insertelement <4 x float> undef, float %load, i32 0
19730 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19731 %2 = fcmp oeq <4 x float> %0, %1
19732 %3 = bitcast i4 %__u to <4 x i1>
19733 %4 = and <4 x i1> %2, %3
19734 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19735 %6 = bitcast <16 x i1> %5 to i16
19741 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19742 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
19743 ; VLX: # %bb.0: # %entry
19744 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19745 ; VLX-NEXT: kmovd %k0, %eax
19748 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
19749 ; NoVLX: # %bb.0: # %entry
19750 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19751 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19752 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19753 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19754 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19755 ; NoVLX-NEXT: kmovw %k0, %eax
19756 ; NoVLX-NEXT: vzeroupper
19759 %0 = bitcast <2 x i64> %__a to <4 x float>
19760 %1 = bitcast <2 x i64> %__b to <4 x float>
19761 %2 = fcmp oeq <4 x float> %0, %1
19762 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19763 %4 = bitcast <32 x i1> %3 to i32
19767 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19768 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
19769 ; VLX: # %bb.0: # %entry
19770 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19771 ; VLX-NEXT: kmovd %k0, %eax
19774 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
19775 ; NoVLX: # %bb.0: # %entry
19776 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19777 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19778 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19779 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19780 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19781 ; NoVLX-NEXT: kmovw %k0, %eax
19782 ; NoVLX-NEXT: vzeroupper
19785 %0 = bitcast <2 x i64> %__a to <4 x float>
19786 %load = load <2 x i64>, ptr %__b
19787 %1 = bitcast <2 x i64> %load to <4 x float>
19788 %2 = fcmp oeq <4 x float> %0, %1
19789 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19790 %4 = bitcast <32 x i1> %3 to i32
19794 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19795 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19796 ; VLX: # %bb.0: # %entry
19797 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19798 ; VLX-NEXT: kmovd %k0, %eax
19801 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19802 ; NoVLX: # %bb.0: # %entry
19803 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19804 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
19805 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19806 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19807 ; NoVLX-NEXT: kmovw %k0, %eax
19808 ; NoVLX-NEXT: vzeroupper
19811 %0 = bitcast <2 x i64> %__a to <4 x float>
19812 %load = load float, ptr %__b
19813 %vec = insertelement <4 x float> undef, float %load, i32 0
19814 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19815 %2 = fcmp oeq <4 x float> %0, %1
19816 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19817 %4 = bitcast <32 x i1> %3 to i32
19821 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19822 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
19823 ; VLX: # %bb.0: # %entry
19824 ; VLX-NEXT: kmovd %edi, %k1
19825 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19826 ; VLX-NEXT: kmovd %k0, %eax
19829 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
19830 ; NoVLX: # %bb.0: # %entry
19831 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19832 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19833 ; NoVLX-NEXT: kmovw %edi, %k1
19834 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19835 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19836 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19837 ; NoVLX-NEXT: kmovw %k0, %eax
19838 ; NoVLX-NEXT: vzeroupper
19841 %0 = bitcast <2 x i64> %__a to <4 x float>
19842 %1 = bitcast <2 x i64> %__b to <4 x float>
19843 %2 = fcmp oeq <4 x float> %0, %1
19844 %3 = bitcast i4 %__u to <4 x i1>
19845 %4 = and <4 x i1> %2, %3
19846 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19847 %6 = bitcast <32 x i1> %5 to i32
19851 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19852 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
19853 ; VLX: # %bb.0: # %entry
19854 ; VLX-NEXT: kmovd %edi, %k1
19855 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19856 ; VLX-NEXT: kmovd %k0, %eax
19859 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
19860 ; NoVLX: # %bb.0: # %entry
19861 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19862 ; NoVLX-NEXT: kmovw %edi, %k1
19863 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19864 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19865 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19866 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19867 ; NoVLX-NEXT: kmovw %k0, %eax
19868 ; NoVLX-NEXT: vzeroupper
19871 %0 = bitcast <2 x i64> %__a to <4 x float>
19872 %load = load <2 x i64>, ptr %__b
19873 %1 = bitcast <2 x i64> %load to <4 x float>
19874 %2 = fcmp oeq <4 x float> %0, %1
19875 %3 = bitcast i4 %__u to <4 x i1>
19876 %4 = and <4 x i1> %2, %3
19877 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19878 %6 = bitcast <32 x i1> %5 to i32
19882 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
19883 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19884 ; VLX: # %bb.0: # %entry
19885 ; VLX-NEXT: kmovd %edi, %k1
19886 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19887 ; VLX-NEXT: kmovd %k0, %eax
19890 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
19891 ; NoVLX: # %bb.0: # %entry
19892 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19893 ; NoVLX-NEXT: kmovw %edi, %k1
19894 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
19895 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19896 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19897 ; NoVLX-NEXT: kmovw %k0, %eax
19898 ; NoVLX-NEXT: vzeroupper
19901 %0 = bitcast <2 x i64> %__a to <4 x float>
19902 %load = load float, ptr %__b
19903 %vec = insertelement <4 x float> undef, float %load, i32 0
19904 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19905 %2 = fcmp oeq <4 x float> %0, %1
19906 %3 = bitcast i4 %__u to <4 x i1>
19907 %4 = and <4 x i1> %2, %3
19908 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19909 %6 = bitcast <32 x i1> %5 to i32
19915 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19916 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
19917 ; VLX: # %bb.0: # %entry
19918 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19919 ; VLX-NEXT: kmovq %k0, %rax
19922 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
19923 ; NoVLX: # %bb.0: # %entry
19924 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19925 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19926 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19927 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19928 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19929 ; NoVLX-NEXT: kmovw %k0, %eax
19930 ; NoVLX-NEXT: vzeroupper
19933 %0 = bitcast <2 x i64> %__a to <4 x float>
19934 %1 = bitcast <2 x i64> %__b to <4 x float>
19935 %2 = fcmp oeq <4 x float> %0, %1
19936 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19937 %4 = bitcast <64 x i1> %3 to i64
19941 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19942 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
19943 ; VLX: # %bb.0: # %entry
19944 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19945 ; VLX-NEXT: kmovq %k0, %rax
19948 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
19949 ; NoVLX: # %bb.0: # %entry
19950 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19951 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19952 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19953 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19954 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19955 ; NoVLX-NEXT: kmovw %k0, %eax
19956 ; NoVLX-NEXT: vzeroupper
19959 %0 = bitcast <2 x i64> %__a to <4 x float>
19960 %load = load <2 x i64>, ptr %__b
19961 %1 = bitcast <2 x i64> %load to <4 x float>
19962 %2 = fcmp oeq <4 x float> %0, %1
19963 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19964 %4 = bitcast <64 x i1> %3 to i64
19968 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
19969 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
19970 ; VLX: # %bb.0: # %entry
19971 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19972 ; VLX-NEXT: kmovq %k0, %rax
19975 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
19976 ; NoVLX: # %bb.0: # %entry
19977 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19978 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
19979 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19980 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19981 ; NoVLX-NEXT: kmovw %k0, %eax
19982 ; NoVLX-NEXT: vzeroupper
19985 %0 = bitcast <2 x i64> %__a to <4 x float>
19986 %load = load float, ptr %__b
19987 %vec = insertelement <4 x float> undef, float %load, i32 0
19988 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19989 %2 = fcmp oeq <4 x float> %0, %1
19990 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19991 %4 = bitcast <64 x i1> %3 to i64
19995 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19996 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
19997 ; VLX: # %bb.0: # %entry
19998 ; VLX-NEXT: kmovd %edi, %k1
19999 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
20000 ; VLX-NEXT: kmovq %k0, %rax
20003 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
20004 ; NoVLX: # %bb.0: # %entry
20005 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
20006 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20007 ; NoVLX-NEXT: kmovw %edi, %k1
20008 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20009 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20010 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20011 ; NoVLX-NEXT: kmovw %k0, %eax
20012 ; NoVLX-NEXT: vzeroupper
20015 %0 = bitcast <2 x i64> %__a to <4 x float>
20016 %1 = bitcast <2 x i64> %__b to <4 x float>
20017 %2 = fcmp oeq <4 x float> %0, %1
20018 %3 = bitcast i4 %__u to <4 x i1>
20019 %4 = and <4 x i1> %2, %3
20020 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20021 %6 = bitcast <64 x i1> %5 to i64
20025 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
20026 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20027 ; VLX: # %bb.0: # %entry
20028 ; VLX-NEXT: kmovd %edi, %k1
20029 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
20030 ; VLX-NEXT: kmovq %k0, %rax
20033 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20034 ; NoVLX: # %bb.0: # %entry
20035 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20036 ; NoVLX-NEXT: kmovw %edi, %k1
20037 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
20038 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20039 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20040 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20041 ; NoVLX-NEXT: kmovw %k0, %eax
20042 ; NoVLX-NEXT: vzeroupper
20045 %0 = bitcast <2 x i64> %__a to <4 x float>
20046 %load = load <2 x i64>, ptr %__b
20047 %1 = bitcast <2 x i64> %load to <4 x float>
20048 %2 = fcmp oeq <4 x float> %0, %1
20049 %3 = bitcast i4 %__u to <4 x i1>
20050 %4 = and <4 x i1> %2, %3
20051 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20052 %6 = bitcast <64 x i1> %5 to i64
20056 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
20057 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20058 ; VLX: # %bb.0: # %entry
20059 ; VLX-NEXT: kmovd %edi, %k1
20060 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20061 ; VLX-NEXT: kmovq %k0, %rax
20064 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20065 ; NoVLX: # %bb.0: # %entry
20066 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20067 ; NoVLX-NEXT: kmovw %edi, %k1
20068 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20069 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20070 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20071 ; NoVLX-NEXT: kmovw %k0, %eax
20072 ; NoVLX-NEXT: vzeroupper
20075 %0 = bitcast <2 x i64> %__a to <4 x float>
20076 %load = load float, ptr %__b
20077 %vec = insertelement <4 x float> undef, float %load, i32 0
20078 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20079 %2 = fcmp oeq <4 x float> %0, %1
20080 %3 = bitcast i4 %__u to <4 x i1>
20081 %4 = and <4 x i1> %2, %3
20082 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20083 %6 = bitcast <64 x i1> %5 to i64
20089 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20090 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20091 ; VLX: # %bb.0: # %entry
20092 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20093 ; VLX-NEXT: kmovd %k0, %eax
20094 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20095 ; VLX-NEXT: vzeroupper
20098 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20099 ; NoVLX: # %bb.0: # %entry
20100 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20101 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20102 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20103 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20104 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20105 ; NoVLX-NEXT: kmovw %k0, %eax
20106 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20107 ; NoVLX-NEXT: vzeroupper
20110 %0 = bitcast <4 x i64> %__a to <8 x float>
20111 %1 = bitcast <4 x i64> %__b to <8 x float>
20112 %2 = fcmp oeq <8 x float> %0, %1
20113 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20114 %4 = bitcast <16 x i1> %3 to i16
20118 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20119 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20120 ; VLX: # %bb.0: # %entry
20121 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20122 ; VLX-NEXT: kmovd %k0, %eax
20123 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20124 ; VLX-NEXT: vzeroupper
20127 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20128 ; NoVLX: # %bb.0: # %entry
20129 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20130 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20131 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20132 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20133 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20134 ; NoVLX-NEXT: kmovw %k0, %eax
20135 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20136 ; NoVLX-NEXT: vzeroupper
20139 %0 = bitcast <4 x i64> %__a to <8 x float>
20140 %load = load <4 x i64>, ptr %__b
20141 %1 = bitcast <4 x i64> %load to <8 x float>
20142 %2 = fcmp oeq <8 x float> %0, %1
20143 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20144 %4 = bitcast <16 x i1> %3 to i16
20148 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20149 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20150 ; VLX: # %bb.0: # %entry
20151 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20152 ; VLX-NEXT: kmovd %k0, %eax
20153 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20154 ; VLX-NEXT: vzeroupper
20157 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20158 ; NoVLX: # %bb.0: # %entry
20159 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20160 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20161 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20162 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20163 ; NoVLX-NEXT: kmovw %k0, %eax
20164 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20165 ; NoVLX-NEXT: vzeroupper
20168 %0 = bitcast <4 x i64> %__a to <8 x float>
20169 %load = load float, ptr %__b
20170 %vec = insertelement <8 x float> undef, float %load, i32 0
20171 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20172 %2 = fcmp oeq <8 x float> %0, %1
20173 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20174 %4 = bitcast <16 x i1> %3 to i16
20178 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20179 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20180 ; VLX: # %bb.0: # %entry
20181 ; VLX-NEXT: kmovd %edi, %k1
20182 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20183 ; VLX-NEXT: kmovd %k0, %eax
20184 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20185 ; VLX-NEXT: vzeroupper
20188 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20189 ; NoVLX: # %bb.0: # %entry
20190 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20191 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20192 ; NoVLX-NEXT: kmovw %edi, %k1
20193 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20194 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20195 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20196 ; NoVLX-NEXT: kmovw %k0, %eax
20197 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20198 ; NoVLX-NEXT: vzeroupper
20201 %0 = bitcast <4 x i64> %__a to <8 x float>
20202 %1 = bitcast <4 x i64> %__b to <8 x float>
20203 %2 = fcmp oeq <8 x float> %0, %1
20204 %3 = bitcast i8 %__u to <8 x i1>
20205 %4 = and <8 x i1> %2, %3
20206 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20207 %6 = bitcast <16 x i1> %5 to i16
20211 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20212 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20213 ; VLX: # %bb.0: # %entry
20214 ; VLX-NEXT: kmovd %edi, %k1
20215 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20216 ; VLX-NEXT: kmovd %k0, %eax
20217 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20218 ; VLX-NEXT: vzeroupper
20221 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20222 ; NoVLX: # %bb.0: # %entry
20223 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20224 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20225 ; NoVLX-NEXT: kmovw %edi, %k1
20226 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20227 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20228 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20229 ; NoVLX-NEXT: kmovw %k0, %eax
20230 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20231 ; NoVLX-NEXT: vzeroupper
20234 %0 = bitcast <4 x i64> %__a to <8 x float>
20235 %load = load <4 x i64>, ptr %__b
20236 %1 = bitcast <4 x i64> %load to <8 x float>
20237 %2 = fcmp oeq <8 x float> %0, %1
20238 %3 = bitcast i8 %__u to <8 x i1>
20239 %4 = and <8 x i1> %2, %3
20240 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20241 %6 = bitcast <16 x i1> %5 to i16
20245 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20246 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20247 ; VLX: # %bb.0: # %entry
20248 ; VLX-NEXT: kmovd %edi, %k1
20249 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20250 ; VLX-NEXT: kmovd %k0, %eax
20251 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20252 ; VLX-NEXT: vzeroupper
20255 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20256 ; NoVLX: # %bb.0: # %entry
20257 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20258 ; NoVLX-NEXT: kmovw %edi, %k1
20259 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20260 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20261 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20262 ; NoVLX-NEXT: kmovw %k0, %eax
20263 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20264 ; NoVLX-NEXT: vzeroupper
20267 %0 = bitcast <4 x i64> %__a to <8 x float>
20268 %load = load float, ptr %__b
20269 %vec = insertelement <8 x float> undef, float %load, i32 0
20270 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20271 %2 = fcmp oeq <8 x float> %0, %1
20272 %3 = bitcast i8 %__u to <8 x i1>
20273 %4 = and <8 x i1> %2, %3
20274 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20275 %6 = bitcast <16 x i1> %5 to i16
20281 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20282 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20283 ; VLX: # %bb.0: # %entry
20284 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20285 ; VLX-NEXT: kmovd %k0, %eax
20286 ; VLX-NEXT: vzeroupper
20289 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20290 ; NoVLX: # %bb.0: # %entry
20291 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20292 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20293 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20294 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20295 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20296 ; NoVLX-NEXT: kmovw %k0, %eax
20297 ; NoVLX-NEXT: vzeroupper
20300 %0 = bitcast <4 x i64> %__a to <8 x float>
20301 %1 = bitcast <4 x i64> %__b to <8 x float>
20302 %2 = fcmp oeq <8 x float> %0, %1
20303 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20304 %4 = bitcast <32 x i1> %3 to i32
20308 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20309 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20310 ; VLX: # %bb.0: # %entry
20311 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20312 ; VLX-NEXT: kmovd %k0, %eax
20313 ; VLX-NEXT: vzeroupper
20316 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20317 ; NoVLX: # %bb.0: # %entry
20318 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20319 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20320 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20321 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20322 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20323 ; NoVLX-NEXT: kmovw %k0, %eax
20324 ; NoVLX-NEXT: vzeroupper
20327 %0 = bitcast <4 x i64> %__a to <8 x float>
20328 %load = load <4 x i64>, ptr %__b
20329 %1 = bitcast <4 x i64> %load to <8 x float>
20330 %2 = fcmp oeq <8 x float> %0, %1
20331 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20332 %4 = bitcast <32 x i1> %3 to i32
20336 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20337 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20338 ; VLX: # %bb.0: # %entry
20339 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20340 ; VLX-NEXT: kmovd %k0, %eax
20341 ; VLX-NEXT: vzeroupper
20344 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20345 ; NoVLX: # %bb.0: # %entry
20346 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20347 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20348 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20349 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20350 ; NoVLX-NEXT: kmovw %k0, %eax
20351 ; NoVLX-NEXT: vzeroupper
20354 %0 = bitcast <4 x i64> %__a to <8 x float>
20355 %load = load float, ptr %__b
20356 %vec = insertelement <8 x float> undef, float %load, i32 0
20357 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20358 %2 = fcmp oeq <8 x float> %0, %1
20359 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20360 %4 = bitcast <32 x i1> %3 to i32
20364 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20365 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20366 ; VLX: # %bb.0: # %entry
20367 ; VLX-NEXT: kmovd %edi, %k1
20368 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20369 ; VLX-NEXT: kmovd %k0, %eax
20370 ; VLX-NEXT: vzeroupper
20373 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20374 ; NoVLX: # %bb.0: # %entry
20375 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20376 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20377 ; NoVLX-NEXT: kmovw %edi, %k1
20378 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20379 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20380 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20381 ; NoVLX-NEXT: kmovw %k0, %eax
20382 ; NoVLX-NEXT: vzeroupper
20385 %0 = bitcast <4 x i64> %__a to <8 x float>
20386 %1 = bitcast <4 x i64> %__b to <8 x float>
20387 %2 = fcmp oeq <8 x float> %0, %1
20388 %3 = bitcast i8 %__u to <8 x i1>
20389 %4 = and <8 x i1> %2, %3
20390 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20391 %6 = bitcast <32 x i1> %5 to i32
20395 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20396 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20397 ; VLX: # %bb.0: # %entry
20398 ; VLX-NEXT: kmovd %edi, %k1
20399 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20400 ; VLX-NEXT: kmovd %k0, %eax
20401 ; VLX-NEXT: vzeroupper
20404 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20405 ; NoVLX: # %bb.0: # %entry
20406 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20407 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20408 ; NoVLX-NEXT: kmovw %edi, %k1
20409 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20410 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20411 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20412 ; NoVLX-NEXT: kmovw %k0, %eax
20413 ; NoVLX-NEXT: vzeroupper
20416 %0 = bitcast <4 x i64> %__a to <8 x float>
20417 %load = load <4 x i64>, ptr %__b
20418 %1 = bitcast <4 x i64> %load to <8 x float>
20419 %2 = fcmp oeq <8 x float> %0, %1
20420 %3 = bitcast i8 %__u to <8 x i1>
20421 %4 = and <8 x i1> %2, %3
20422 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20423 %6 = bitcast <32 x i1> %5 to i32
20427 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20428 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20429 ; VLX: # %bb.0: # %entry
20430 ; VLX-NEXT: kmovd %edi, %k1
20431 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20432 ; VLX-NEXT: kmovd %k0, %eax
20433 ; VLX-NEXT: vzeroupper
20436 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20437 ; NoVLX: # %bb.0: # %entry
20438 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20439 ; NoVLX-NEXT: kmovw %edi, %k1
20440 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20441 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20442 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20443 ; NoVLX-NEXT: kmovw %k0, %eax
20444 ; NoVLX-NEXT: vzeroupper
20447 %0 = bitcast <4 x i64> %__a to <8 x float>
20448 %load = load float, ptr %__b
20449 %vec = insertelement <8 x float> undef, float %load, i32 0
20450 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20451 %2 = fcmp oeq <8 x float> %0, %1
20452 %3 = bitcast i8 %__u to <8 x i1>
20453 %4 = and <8 x i1> %2, %3
20454 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20455 %6 = bitcast <32 x i1> %5 to i32
20461 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20462 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20463 ; VLX: # %bb.0: # %entry
20464 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20465 ; VLX-NEXT: kmovq %k0, %rax
20466 ; VLX-NEXT: vzeroupper
20469 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20470 ; NoVLX: # %bb.0: # %entry
20471 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20472 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20473 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20474 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20475 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20476 ; NoVLX-NEXT: kmovw %k0, %eax
20477 ; NoVLX-NEXT: vzeroupper
20480 %0 = bitcast <4 x i64> %__a to <8 x float>
20481 %1 = bitcast <4 x i64> %__b to <8 x float>
20482 %2 = fcmp oeq <8 x float> %0, %1
20483 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20484 %4 = bitcast <64 x i1> %3 to i64
20488 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20489 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20490 ; VLX: # %bb.0: # %entry
20491 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20492 ; VLX-NEXT: kmovq %k0, %rax
20493 ; VLX-NEXT: vzeroupper
20496 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20497 ; NoVLX: # %bb.0: # %entry
20498 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20499 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20500 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20501 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20502 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20503 ; NoVLX-NEXT: kmovw %k0, %eax
20504 ; NoVLX-NEXT: vzeroupper
20507 %0 = bitcast <4 x i64> %__a to <8 x float>
20508 %load = load <4 x i64>, ptr %__b
20509 %1 = bitcast <4 x i64> %load to <8 x float>
20510 %2 = fcmp oeq <8 x float> %0, %1
20511 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20512 %4 = bitcast <64 x i1> %3 to i64
20516 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
20517 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20518 ; VLX: # %bb.0: # %entry
20519 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20520 ; VLX-NEXT: kmovq %k0, %rax
20521 ; VLX-NEXT: vzeroupper
20524 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20525 ; NoVLX: # %bb.0: # %entry
20526 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20527 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20528 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20529 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20530 ; NoVLX-NEXT: kmovw %k0, %eax
20531 ; NoVLX-NEXT: vzeroupper
20534 %0 = bitcast <4 x i64> %__a to <8 x float>
20535 %load = load float, ptr %__b
20536 %vec = insertelement <8 x float> undef, float %load, i32 0
20537 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20538 %2 = fcmp oeq <8 x float> %0, %1
20539 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20540 %4 = bitcast <64 x i1> %3 to i64
20544 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20545 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20546 ; VLX: # %bb.0: # %entry
20547 ; VLX-NEXT: kmovd %edi, %k1
20548 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20549 ; VLX-NEXT: kmovq %k0, %rax
20550 ; VLX-NEXT: vzeroupper
20553 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20554 ; NoVLX: # %bb.0: # %entry
20555 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20556 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20557 ; NoVLX-NEXT: kmovw %edi, %k1
20558 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20559 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20560 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20561 ; NoVLX-NEXT: kmovw %k0, %eax
20562 ; NoVLX-NEXT: vzeroupper
20565 %0 = bitcast <4 x i64> %__a to <8 x float>
20566 %1 = bitcast <4 x i64> %__b to <8 x float>
20567 %2 = fcmp oeq <8 x float> %0, %1
20568 %3 = bitcast i8 %__u to <8 x i1>
20569 %4 = and <8 x i1> %2, %3
20570 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20571 %6 = bitcast <64 x i1> %5 to i64
20575 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20576 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20577 ; VLX: # %bb.0: # %entry
20578 ; VLX-NEXT: kmovd %edi, %k1
20579 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20580 ; VLX-NEXT: kmovq %k0, %rax
20581 ; VLX-NEXT: vzeroupper
20584 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20585 ; NoVLX: # %bb.0: # %entry
20586 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20587 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20588 ; NoVLX-NEXT: kmovw %edi, %k1
20589 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20590 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20591 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20592 ; NoVLX-NEXT: kmovw %k0, %eax
20593 ; NoVLX-NEXT: vzeroupper
20596 %0 = bitcast <4 x i64> %__a to <8 x float>
20597 %load = load <4 x i64>, ptr %__b
20598 %1 = bitcast <4 x i64> %load to <8 x float>
20599 %2 = fcmp oeq <8 x float> %0, %1
20600 %3 = bitcast i8 %__u to <8 x i1>
20601 %4 = and <8 x i1> %2, %3
20602 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20603 %6 = bitcast <64 x i1> %5 to i64
20607 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
20608 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20609 ; VLX: # %bb.0: # %entry
20610 ; VLX-NEXT: kmovd %edi, %k1
20611 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20612 ; VLX-NEXT: kmovq %k0, %rax
20613 ; VLX-NEXT: vzeroupper
20616 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20617 ; NoVLX: # %bb.0: # %entry
20618 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20619 ; NoVLX-NEXT: kmovw %edi, %k1
20620 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20621 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20622 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20623 ; NoVLX-NEXT: kmovw %k0, %eax
20624 ; NoVLX-NEXT: vzeroupper
20627 %0 = bitcast <4 x i64> %__a to <8 x float>
20628 %load = load float, ptr %__b
20629 %vec = insertelement <8 x float> undef, float %load, i32 0
20630 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20631 %2 = fcmp oeq <8 x float> %0, %1
20632 %3 = bitcast i8 %__u to <8 x i1>
20633 %4 = and <8 x i1> %2, %3
20634 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20635 %6 = bitcast <64 x i1> %5 to i64
20641 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20642 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
20643 ; VLX: # %bb.0: # %entry
20644 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20645 ; VLX-NEXT: kmovd %k0, %eax
20646 ; VLX-NEXT: vzeroupper
20649 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
20650 ; NoVLX: # %bb.0: # %entry
20651 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20652 ; NoVLX-NEXT: kmovw %k0, %eax
20653 ; NoVLX-NEXT: vzeroupper
20656 %0 = bitcast <8 x i64> %__a to <16 x float>
20657 %1 = bitcast <8 x i64> %__b to <16 x float>
20658 %2 = fcmp oeq <16 x float> %0, %1
20659 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20660 %4 = bitcast <32 x i1> %3 to i32
20664 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
20665 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
20666 ; VLX: # %bb.0: # %entry
20667 ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
20668 ; VLX-NEXT: kmovd %k0, %eax
20669 ; VLX-NEXT: vzeroupper
20672 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
20673 ; NoVLX: # %bb.0: # %entry
20674 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
20675 ; NoVLX-NEXT: kmovw %k0, %eax
20676 ; NoVLX-NEXT: vzeroupper
20679 %0 = bitcast <8 x i64> %__a to <16 x float>
20680 %load = load <8 x i64>, ptr %__b
20681 %1 = bitcast <8 x i64> %load to <16 x float>
20682 %2 = fcmp oeq <16 x float> %0, %1
20683 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20684 %4 = bitcast <32 x i1> %3 to i32
20688 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
20689 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20690 ; VLX: # %bb.0: # %entry
20691 ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20692 ; VLX-NEXT: kmovd %k0, %eax
20693 ; VLX-NEXT: vzeroupper
20696 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20697 ; NoVLX: # %bb.0: # %entry
20698 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20699 ; NoVLX-NEXT: kmovw %k0, %eax
20700 ; NoVLX-NEXT: vzeroupper
20703 %0 = bitcast <8 x i64> %__a to <16 x float>
20704 %load = load float, ptr %__b
20705 %vec = insertelement <16 x float> undef, float %load, i32 0
20706 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20707 %2 = fcmp oeq <16 x float> %0, %1
20708 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20709 %4 = bitcast <32 x i1> %3 to i32
20713 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20714 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
20715 ; VLX: # %bb.0: # %entry
20716 ; VLX-NEXT: kmovd %edi, %k1
20717 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20718 ; VLX-NEXT: kmovd %k0, %eax
20719 ; VLX-NEXT: vzeroupper
20722 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
20723 ; NoVLX: # %bb.0: # %entry
20724 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20725 ; NoVLX-NEXT: kmovw %k0, %eax
20726 ; NoVLX-NEXT: andl %edi, %eax
20727 ; NoVLX-NEXT: vzeroupper
20730 %0 = bitcast <8 x i64> %__a to <16 x float>
20731 %1 = bitcast <8 x i64> %__b to <16 x float>
20732 %2 = fcmp oeq <16 x float> %0, %1
20733 %3 = bitcast i16 %__u to <16 x i1>
20734 %4 = and <16 x i1> %2, %3
20735 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20736 %6 = bitcast <32 x i1> %5 to i32
20740 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
20741 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
20742 ; VLX: # %bb.0: # %entry
20743 ; VLX-NEXT: kmovd %edi, %k1
20744 ; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
20745 ; VLX-NEXT: kmovd %k0, %eax
20746 ; VLX-NEXT: vzeroupper
20749 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
20750 ; NoVLX: # %bb.0: # %entry
20751 ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0
20752 ; NoVLX-NEXT: kmovw %k0, %eax
20753 ; NoVLX-NEXT: andl %edi, %eax
20754 ; NoVLX-NEXT: vzeroupper
20757 %0 = bitcast <8 x i64> %__a to <16 x float>
20758 %load = load <8 x i64>, ptr %__b
20759 %1 = bitcast <8 x i64> %load to <16 x float>
20760 %2 = fcmp oeq <16 x float> %0, %1
20761 %3 = bitcast i16 %__u to <16 x i1>
20762 %4 = and <16 x i1> %2, %3
20763 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20764 %6 = bitcast <32 x i1> %5 to i32
20768 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
20769 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20770 ; VLX: # %bb.0: # %entry
20771 ; VLX-NEXT: kmovd %edi, %k1
20772 ; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20773 ; VLX-NEXT: kmovd %k0, %eax
20774 ; VLX-NEXT: vzeroupper
20777 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
20778 ; NoVLX: # %bb.0: # %entry
20779 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0
20780 ; NoVLX-NEXT: kmovw %k0, %eax
20781 ; NoVLX-NEXT: andl %edi, %eax
20782 ; NoVLX-NEXT: vzeroupper
20785 %0 = bitcast <8 x i64> %__a to <16 x float>
20786 %load = load float, ptr %__b
20787 %vec = insertelement <16 x float> undef, float %load, i32 0
20788 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20789 %2 = fcmp oeq <16 x float> %0, %1
20790 %3 = bitcast i16 %__u to <16 x i1>
20791 %4 = and <16 x i1> %2, %3
20792 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20793 %6 = bitcast <32 x i1> %5 to i32
20799 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20800 ; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
20801 ; CHECK: # %bb.0: # %entry
20802 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
20803 ; CHECK-NEXT: kmovw %k0, %eax
20804 ; CHECK-NEXT: vzeroupper
20807 %0 = bitcast <8 x i64> %__a to <16 x float>
20808 %1 = bitcast <8 x i64> %__b to <16 x float>
20809 %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
20810 %3 = bitcast <16 x i1> %2 to i16
20811 %4 = zext i16 %3 to i32
20815 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20816 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
20817 ; VLX: # %bb.0: # %entry
20818 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
20819 ; VLX-NEXT: kmovd %k0, %eax
20820 ; VLX-NEXT: andl %edi, %eax
20821 ; VLX-NEXT: vzeroupper
20824 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
20825 ; NoVLX: # %bb.0: # %entry
20826 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
20827 ; NoVLX-NEXT: kmovw %k0, %eax
20828 ; NoVLX-NEXT: andl %edi, %eax
20829 ; NoVLX-NEXT: vzeroupper
20832 %0 = bitcast <8 x i64> %__a to <16 x float>
20833 %1 = bitcast <8 x i64> %__b to <16 x float>
20834 %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
20835 %3 = bitcast i16 %__u to <16 x i1>
20836 %4 = and <16 x i1> %2, %3
20837 %5 = bitcast <16 x i1> %4 to i16
20838 %6 = zext i16 %5 to i32
20844 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20845 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
20846 ; VLX: # %bb.0: # %entry
20847 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20848 ; VLX-NEXT: kmovq %k0, %rax
20849 ; VLX-NEXT: vzeroupper
20852 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
20853 ; NoVLX: # %bb.0: # %entry
20854 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20855 ; NoVLX-NEXT: kmovw %k0, %eax
20856 ; NoVLX-NEXT: vzeroupper
20859 %0 = bitcast <8 x i64> %__a to <16 x float>
20860 %1 = bitcast <8 x i64> %__b to <16 x float>
20861 %2 = fcmp oeq <16 x float> %0, %1
20862 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20863 %4 = bitcast <64 x i1> %3 to i64
20867 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
20868 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
20869 ; VLX: # %bb.0: # %entry
20870 ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
20871 ; VLX-NEXT: kmovq %k0, %rax
20872 ; VLX-NEXT: vzeroupper
20875 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
20876 ; NoVLX: # %bb.0: # %entry
20877 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
20878 ; NoVLX-NEXT: kmovw %k0, %eax
20879 ; NoVLX-NEXT: vzeroupper
20882 %0 = bitcast <8 x i64> %__a to <16 x float>
20883 %load = load <8 x i64>, ptr %__b
20884 %1 = bitcast <8 x i64> %load to <16 x float>
20885 %2 = fcmp oeq <16 x float> %0, %1
20886 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20887 %4 = bitcast <64 x i1> %3 to i64
20891 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
20892 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20893 ; VLX: # %bb.0: # %entry
20894 ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20895 ; VLX-NEXT: kmovq %k0, %rax
20896 ; VLX-NEXT: vzeroupper
20899 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20900 ; NoVLX: # %bb.0: # %entry
20901 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
20902 ; NoVLX-NEXT: kmovw %k0, %eax
20903 ; NoVLX-NEXT: vzeroupper
20906 %0 = bitcast <8 x i64> %__a to <16 x float>
20907 %load = load float, ptr %__b
20908 %vec = insertelement <16 x float> undef, float %load, i32 0
20909 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20910 %2 = fcmp oeq <16 x float> %0, %1
20911 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20912 %4 = bitcast <64 x i1> %3 to i64
20916 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20917 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
20918 ; VLX: # %bb.0: # %entry
20919 ; VLX-NEXT: kmovd %edi, %k1
20920 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20921 ; VLX-NEXT: kmovq %k0, %rax
20922 ; VLX-NEXT: vzeroupper
20925 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
20926 ; NoVLX: # %bb.0: # %entry
20927 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20928 ; NoVLX-NEXT: kmovw %k0, %eax
20929 ; NoVLX-NEXT: andl %edi, %eax
20930 ; NoVLX-NEXT: vzeroupper
20933 %0 = bitcast <8 x i64> %__a to <16 x float>
20934 %1 = bitcast <8 x i64> %__b to <16 x float>
20935 %2 = fcmp oeq <16 x float> %0, %1
20936 %3 = bitcast i16 %__u to <16 x i1>
20937 %4 = and <16 x i1> %2, %3
20938 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20939 %6 = bitcast <64 x i1> %5 to i64
20943 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
20944 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
20945 ; VLX: # %bb.0: # %entry
20946 ; VLX-NEXT: kmovd %edi, %k1
20947 ; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
20948 ; VLX-NEXT: kmovq %k0, %rax
20949 ; VLX-NEXT: vzeroupper
20952 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
20953 ; NoVLX: # %bb.0: # %entry
20954 ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0
20955 ; NoVLX-NEXT: kmovw %k0, %eax
20956 ; NoVLX-NEXT: andl %edi, %eax
20957 ; NoVLX-NEXT: vzeroupper
20960 %0 = bitcast <8 x i64> %__a to <16 x float>
20961 %load = load <8 x i64>, ptr %__b
20962 %1 = bitcast <8 x i64> %load to <16 x float>
20963 %2 = fcmp oeq <16 x float> %0, %1
20964 %3 = bitcast i16 %__u to <16 x i1>
20965 %4 = and <16 x i1> %2, %3
20966 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20967 %6 = bitcast <64 x i1> %5 to i64
20971 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
20972 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20973 ; VLX: # %bb.0: # %entry
20974 ; VLX-NEXT: kmovd %edi, %k1
20975 ; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
20976 ; VLX-NEXT: kmovq %k0, %rax
20977 ; VLX-NEXT: vzeroupper
20980 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
20981 ; NoVLX: # %bb.0: # %entry
20982 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0
20983 ; NoVLX-NEXT: kmovw %k0, %eax
20984 ; NoVLX-NEXT: andl %edi, %eax
20985 ; NoVLX-NEXT: vzeroupper
20988 %0 = bitcast <8 x i64> %__a to <16 x float>
20989 %load = load float, ptr %__b
20990 %vec = insertelement <16 x float> undef, float %load, i32 0
20991 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20992 %2 = fcmp oeq <16 x float> %0, %1
20993 %3 = bitcast i16 %__u to <16 x i1>
20994 %4 = and <16 x i1> %2, %3
20995 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
20996 %6 = bitcast <64 x i1> %5 to i64
21002 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21003 ; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
21004 ; CHECK: # %bb.0: # %entry
21005 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21006 ; CHECK-NEXT: kmovw %k0, %eax
21007 ; CHECK-NEXT: vzeroupper
21010 %0 = bitcast <8 x i64> %__a to <16 x float>
21011 %1 = bitcast <8 x i64> %__b to <16 x float>
21012 %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
21013 %3 = bitcast <16 x i1> %2 to i16
21014 %4 = zext i16 %3 to i64
21018 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21019 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21020 ; VLX: # %bb.0: # %entry
21021 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21022 ; VLX-NEXT: kmovd %k0, %eax
21023 ; VLX-NEXT: andl %edi, %eax
21024 ; VLX-NEXT: vzeroupper
21027 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21028 ; NoVLX: # %bb.0: # %entry
21029 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21030 ; NoVLX-NEXT: kmovw %k0, %eax
21031 ; NoVLX-NEXT: andl %edi, %eax
21032 ; NoVLX-NEXT: vzeroupper
21035 %0 = bitcast <8 x i64> %__a to <16 x float>
21036 %1 = bitcast <8 x i64> %__b to <16 x float>
21037 %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
21038 %3 = bitcast i16 %__u to <16 x i1>
21039 %4 = and <16 x i1> %2, %3
21040 %5 = bitcast <16 x i1> %4 to i16
21041 %6 = zext i16 %5 to i64
21047 declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32)
21048 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21049 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21050 ; VLX: # %bb.0: # %entry
21051 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21052 ; VLX-NEXT: kmovb %k0, %eax
21055 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21056 ; NoVLX: # %bb.0: # %entry
21057 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21058 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21059 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21060 ; NoVLX-NEXT: kmovw %k0, %eax
21061 ; NoVLX-NEXT: andl $3, %eax
21062 ; NoVLX-NEXT: vzeroupper
21065 %0 = bitcast <2 x i64> %__a to <2 x double>
21066 %1 = bitcast <2 x i64> %__b to <2 x double>
21067 %2 = fcmp oeq <2 x double> %0, %1
21068 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21069 %4 = bitcast <4 x i1> %3 to i4
21073 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21074 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21075 ; VLX: # %bb.0: # %entry
21076 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21077 ; VLX-NEXT: kmovb %k0, %eax
21080 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21081 ; NoVLX: # %bb.0: # %entry
21082 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21083 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21084 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21085 ; NoVLX-NEXT: kmovw %k0, %eax
21086 ; NoVLX-NEXT: andl $3, %eax
21087 ; NoVLX-NEXT: vzeroupper
21090 %0 = bitcast <2 x i64> %__a to <2 x double>
21091 %load = load <2 x i64>, ptr %__b
21092 %1 = bitcast <2 x i64> %load to <2 x double>
21093 %2 = fcmp oeq <2 x double> %0, %1
21094 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21095 %4 = bitcast <4 x i1> %3 to i4
21099 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21100 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21101 ; VLX: # %bb.0: # %entry
21102 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21103 ; VLX-NEXT: kmovb %k0, %eax
21106 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21107 ; NoVLX: # %bb.0: # %entry
21108 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21109 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21110 ; NoVLX-NEXT: kmovw %k0, %eax
21111 ; NoVLX-NEXT: andl $3, %eax
21112 ; NoVLX-NEXT: vzeroupper
21115 %0 = bitcast <2 x i64> %__a to <2 x double>
21116 %load = load double, ptr %__b
21117 %vec = insertelement <2 x double> undef, double %load, i32 0
21118 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21119 %2 = fcmp oeq <2 x double> %0, %1
21120 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21121 %4 = bitcast <4 x i1> %3 to i4
21125 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21126 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21127 ; VLX: # %bb.0: # %entry
21128 ; VLX-NEXT: kmovd %edi, %k1
21129 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21130 ; VLX-NEXT: kmovb %k0, %eax
21133 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21134 ; NoVLX: # %bb.0: # %entry
21135 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21136 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21137 ; NoVLX-NEXT: kmovw %edi, %k1
21138 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21139 ; NoVLX-NEXT: kmovw %k0, %eax
21140 ; NoVLX-NEXT: andl $3, %eax
21141 ; NoVLX-NEXT: vzeroupper
21144 %0 = bitcast <2 x i64> %__a to <2 x double>
21145 %1 = bitcast <2 x i64> %__b to <2 x double>
21146 %2 = fcmp oeq <2 x double> %0, %1
21147 %3 = bitcast i2 %__u to <2 x i1>
21148 %4 = and <2 x i1> %2, %3
21149 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21150 %6 = bitcast <4 x i1> %5 to i4
21154 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21155 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21156 ; VLX: # %bb.0: # %entry
21157 ; VLX-NEXT: kmovd %edi, %k1
21158 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21159 ; VLX-NEXT: kmovb %k0, %eax
21162 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21163 ; NoVLX: # %bb.0: # %entry
21164 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21165 ; NoVLX-NEXT: kmovw %edi, %k1
21166 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21167 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21168 ; NoVLX-NEXT: kmovw %k0, %eax
21169 ; NoVLX-NEXT: andl $3, %eax
21170 ; NoVLX-NEXT: vzeroupper
21173 %0 = bitcast <2 x i64> %__a to <2 x double>
21174 %load = load <2 x i64>, ptr %__b
21175 %1 = bitcast <2 x i64> %load to <2 x double>
21176 %2 = fcmp oeq <2 x double> %0, %1
21177 %3 = bitcast i2 %__u to <2 x i1>
21178 %4 = and <2 x i1> %2, %3
21179 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21180 %6 = bitcast <4 x i1> %5 to i4
21184 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21185 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21186 ; VLX: # %bb.0: # %entry
21187 ; VLX-NEXT: kmovd %edi, %k1
21188 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21189 ; VLX-NEXT: kmovb %k0, %eax
21192 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21193 ; NoVLX: # %bb.0: # %entry
21194 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21195 ; NoVLX-NEXT: kmovw %edi, %k1
21196 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21197 ; NoVLX-NEXT: kmovw %k0, %eax
21198 ; NoVLX-NEXT: andl $3, %eax
21199 ; NoVLX-NEXT: vzeroupper
21202 %0 = bitcast <2 x i64> %__a to <2 x double>
21203 %load = load double, ptr %__b
21204 %vec = insertelement <2 x double> undef, double %load, i32 0
21205 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21206 %2 = fcmp oeq <2 x double> %0, %1
21207 %3 = bitcast i2 %__u to <2 x i1>
21208 %4 = and <2 x i1> %2, %3
21209 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21210 %6 = bitcast <4 x i1> %5 to i4
21216 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21217 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21218 ; VLX: # %bb.0: # %entry
21219 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21220 ; VLX-NEXT: kmovd %k0, %eax
21221 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21224 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21225 ; NoVLX: # %bb.0: # %entry
21226 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21227 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21228 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21229 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21230 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21231 ; NoVLX-NEXT: kmovw %k0, %eax
21232 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21233 ; NoVLX-NEXT: vzeroupper
21236 %0 = bitcast <2 x i64> %__a to <2 x double>
21237 %1 = bitcast <2 x i64> %__b to <2 x double>
21238 %2 = fcmp oeq <2 x double> %0, %1
21239 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21240 %4 = bitcast <8 x i1> %3 to i8
21244 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21245 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21246 ; VLX: # %bb.0: # %entry
21247 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21248 ; VLX-NEXT: kmovd %k0, %eax
21249 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21252 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21253 ; NoVLX: # %bb.0: # %entry
21254 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21255 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21256 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21257 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21258 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21259 ; NoVLX-NEXT: kmovw %k0, %eax
21260 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21261 ; NoVLX-NEXT: vzeroupper
21264 %0 = bitcast <2 x i64> %__a to <2 x double>
21265 %load = load <2 x i64>, ptr %__b
21266 %1 = bitcast <2 x i64> %load to <2 x double>
21267 %2 = fcmp oeq <2 x double> %0, %1
21268 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21269 %4 = bitcast <8 x i1> %3 to i8
21273 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21274 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21275 ; VLX: # %bb.0: # %entry
21276 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21277 ; VLX-NEXT: kmovd %k0, %eax
21278 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21281 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21282 ; NoVLX: # %bb.0: # %entry
21283 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21284 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21285 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21286 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21287 ; NoVLX-NEXT: kmovw %k0, %eax
21288 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21289 ; NoVLX-NEXT: vzeroupper
21292 %0 = bitcast <2 x i64> %__a to <2 x double>
21293 %load = load double, ptr %__b
21294 %vec = insertelement <2 x double> undef, double %load, i32 0
21295 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21296 %2 = fcmp oeq <2 x double> %0, %1
21297 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21298 %4 = bitcast <8 x i1> %3 to i8
21302 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21303 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21304 ; VLX: # %bb.0: # %entry
21305 ; VLX-NEXT: kmovd %edi, %k1
21306 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21307 ; VLX-NEXT: kmovd %k0, %eax
21308 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21311 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21312 ; NoVLX: # %bb.0: # %entry
21313 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21314 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21315 ; NoVLX-NEXT: kmovw %edi, %k1
21316 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21317 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21318 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21319 ; NoVLX-NEXT: kmovw %k0, %eax
21320 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21321 ; NoVLX-NEXT: vzeroupper
21324 %0 = bitcast <2 x i64> %__a to <2 x double>
21325 %1 = bitcast <2 x i64> %__b to <2 x double>
21326 %2 = fcmp oeq <2 x double> %0, %1
21327 %3 = bitcast i2 %__u to <2 x i1>
21328 %4 = and <2 x i1> %2, %3
21329 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21330 %6 = bitcast <8 x i1> %5 to i8
21334 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21335 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21336 ; VLX: # %bb.0: # %entry
21337 ; VLX-NEXT: kmovd %edi, %k1
21338 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21339 ; VLX-NEXT: kmovd %k0, %eax
21340 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21343 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21344 ; NoVLX: # %bb.0: # %entry
21345 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21346 ; NoVLX-NEXT: kmovw %edi, %k1
21347 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21348 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21349 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21350 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21351 ; NoVLX-NEXT: kmovw %k0, %eax
21352 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21353 ; NoVLX-NEXT: vzeroupper
21356 %0 = bitcast <2 x i64> %__a to <2 x double>
21357 %load = load <2 x i64>, ptr %__b
21358 %1 = bitcast <2 x i64> %load to <2 x double>
21359 %2 = fcmp oeq <2 x double> %0, %1
21360 %3 = bitcast i2 %__u to <2 x i1>
21361 %4 = and <2 x i1> %2, %3
21362 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21363 %6 = bitcast <8 x i1> %5 to i8
21367 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21368 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21369 ; VLX: # %bb.0: # %entry
21370 ; VLX-NEXT: kmovd %edi, %k1
21371 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21372 ; VLX-NEXT: kmovd %k0, %eax
21373 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21376 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21377 ; NoVLX: # %bb.0: # %entry
21378 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21379 ; NoVLX-NEXT: kmovw %edi, %k1
21380 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21381 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21382 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21383 ; NoVLX-NEXT: kmovw %k0, %eax
21384 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21385 ; NoVLX-NEXT: vzeroupper
21388 %0 = bitcast <2 x i64> %__a to <2 x double>
21389 %load = load double, ptr %__b
21390 %vec = insertelement <2 x double> undef, double %load, i32 0
21391 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21392 %2 = fcmp oeq <2 x double> %0, %1
21393 %3 = bitcast i2 %__u to <2 x i1>
21394 %4 = and <2 x i1> %2, %3
21395 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21396 %6 = bitcast <8 x i1> %5 to i8
21402 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21403 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21404 ; VLX: # %bb.0: # %entry
21405 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21406 ; VLX-NEXT: kmovd %k0, %eax
21407 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21410 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21411 ; NoVLX: # %bb.0: # %entry
21412 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21413 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21414 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21415 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21416 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21417 ; NoVLX-NEXT: kmovw %k0, %eax
21418 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21419 ; NoVLX-NEXT: vzeroupper
21422 %0 = bitcast <2 x i64> %__a to <2 x double>
21423 %1 = bitcast <2 x i64> %__b to <2 x double>
21424 %2 = fcmp oeq <2 x double> %0, %1
21425 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21426 %4 = bitcast <16 x i1> %3 to i16
21430 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21431 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21432 ; VLX: # %bb.0: # %entry
21433 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21434 ; VLX-NEXT: kmovd %k0, %eax
21435 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21438 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21439 ; NoVLX: # %bb.0: # %entry
21440 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21441 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21442 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21443 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21444 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21445 ; NoVLX-NEXT: kmovw %k0, %eax
21446 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21447 ; NoVLX-NEXT: vzeroupper
21450 %0 = bitcast <2 x i64> %__a to <2 x double>
21451 %load = load <2 x i64>, ptr %__b
21452 %1 = bitcast <2 x i64> %load to <2 x double>
21453 %2 = fcmp oeq <2 x double> %0, %1
21454 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21455 %4 = bitcast <16 x i1> %3 to i16
21459 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21460 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21461 ; VLX: # %bb.0: # %entry
21462 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21463 ; VLX-NEXT: kmovd %k0, %eax
21464 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21467 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21468 ; NoVLX: # %bb.0: # %entry
21469 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21470 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21471 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21472 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21473 ; NoVLX-NEXT: kmovw %k0, %eax
21474 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21475 ; NoVLX-NEXT: vzeroupper
21478 %0 = bitcast <2 x i64> %__a to <2 x double>
21479 %load = load double, ptr %__b
21480 %vec = insertelement <2 x double> undef, double %load, i32 0
21481 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21482 %2 = fcmp oeq <2 x double> %0, %1
21483 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21484 %4 = bitcast <16 x i1> %3 to i16
21488 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21489 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21490 ; VLX: # %bb.0: # %entry
21491 ; VLX-NEXT: kmovd %edi, %k1
21492 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21493 ; VLX-NEXT: kmovd %k0, %eax
21494 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21497 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21498 ; NoVLX: # %bb.0: # %entry
21499 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21500 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21501 ; NoVLX-NEXT: kmovw %edi, %k1
21502 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21503 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21504 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21505 ; NoVLX-NEXT: kmovw %k0, %eax
21506 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21507 ; NoVLX-NEXT: vzeroupper
21510 %0 = bitcast <2 x i64> %__a to <2 x double>
21511 %1 = bitcast <2 x i64> %__b to <2 x double>
21512 %2 = fcmp oeq <2 x double> %0, %1
21513 %3 = bitcast i2 %__u to <2 x i1>
21514 %4 = and <2 x i1> %2, %3
21515 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21516 %6 = bitcast <16 x i1> %5 to i16
21520 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21521 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21522 ; VLX: # %bb.0: # %entry
21523 ; VLX-NEXT: kmovd %edi, %k1
21524 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21525 ; VLX-NEXT: kmovd %k0, %eax
21526 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21529 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21530 ; NoVLX: # %bb.0: # %entry
21531 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21532 ; NoVLX-NEXT: kmovw %edi, %k1
21533 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21534 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21535 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21536 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21537 ; NoVLX-NEXT: kmovw %k0, %eax
21538 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21539 ; NoVLX-NEXT: vzeroupper
21542 %0 = bitcast <2 x i64> %__a to <2 x double>
21543 %load = load <2 x i64>, ptr %__b
21544 %1 = bitcast <2 x i64> %load to <2 x double>
21545 %2 = fcmp oeq <2 x double> %0, %1
21546 %3 = bitcast i2 %__u to <2 x i1>
21547 %4 = and <2 x i1> %2, %3
21548 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21549 %6 = bitcast <16 x i1> %5 to i16
21553 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21554 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21555 ; VLX: # %bb.0: # %entry
21556 ; VLX-NEXT: kmovd %edi, %k1
21557 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21558 ; VLX-NEXT: kmovd %k0, %eax
21559 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21562 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21563 ; NoVLX: # %bb.0: # %entry
21564 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21565 ; NoVLX-NEXT: kmovw %edi, %k1
21566 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21567 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21568 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21569 ; NoVLX-NEXT: kmovw %k0, %eax
21570 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21571 ; NoVLX-NEXT: vzeroupper
21574 %0 = bitcast <2 x i64> %__a to <2 x double>
21575 %load = load double, ptr %__b
21576 %vec = insertelement <2 x double> undef, double %load, i32 0
21577 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21578 %2 = fcmp oeq <2 x double> %0, %1
21579 %3 = bitcast i2 %__u to <2 x i1>
21580 %4 = and <2 x i1> %2, %3
21581 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21582 %6 = bitcast <16 x i1> %5 to i16
21588 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21589 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
21590 ; VLX: # %bb.0: # %entry
21591 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21592 ; VLX-NEXT: kmovd %k0, %eax
21595 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
21596 ; NoVLX: # %bb.0: # %entry
21597 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21598 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21599 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21600 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21601 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21602 ; NoVLX-NEXT: kmovw %k0, %eax
21603 ; NoVLX-NEXT: vzeroupper
21606 %0 = bitcast <2 x i64> %__a to <2 x double>
21607 %1 = bitcast <2 x i64> %__b to <2 x double>
21608 %2 = fcmp oeq <2 x double> %0, %1
21609 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21610 %4 = bitcast <32 x i1> %3 to i32
21614 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21615 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
21616 ; VLX: # %bb.0: # %entry
21617 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21618 ; VLX-NEXT: kmovd %k0, %eax
21621 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
21622 ; NoVLX: # %bb.0: # %entry
21623 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21624 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21625 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21626 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21627 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21628 ; NoVLX-NEXT: kmovw %k0, %eax
21629 ; NoVLX-NEXT: vzeroupper
21632 %0 = bitcast <2 x i64> %__a to <2 x double>
21633 %load = load <2 x i64>, ptr %__b
21634 %1 = bitcast <2 x i64> %load to <2 x double>
21635 %2 = fcmp oeq <2 x double> %0, %1
21636 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21637 %4 = bitcast <32 x i1> %3 to i32
21641 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21642 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21643 ; VLX: # %bb.0: # %entry
21644 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21645 ; VLX-NEXT: kmovd %k0, %eax
21648 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21649 ; NoVLX: # %bb.0: # %entry
21650 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21651 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21652 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21653 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21654 ; NoVLX-NEXT: kmovw %k0, %eax
21655 ; NoVLX-NEXT: vzeroupper
21658 %0 = bitcast <2 x i64> %__a to <2 x double>
21659 %load = load double, ptr %__b
21660 %vec = insertelement <2 x double> undef, double %load, i32 0
21661 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21662 %2 = fcmp oeq <2 x double> %0, %1
21663 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21664 %4 = bitcast <32 x i1> %3 to i32
21668 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21669 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
21670 ; VLX: # %bb.0: # %entry
21671 ; VLX-NEXT: kmovd %edi, %k1
21672 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21673 ; VLX-NEXT: kmovd %k0, %eax
21676 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
21677 ; NoVLX: # %bb.0: # %entry
21678 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21679 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21680 ; NoVLX-NEXT: kmovw %edi, %k1
21681 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21682 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21683 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21684 ; NoVLX-NEXT: kmovw %k0, %eax
21685 ; NoVLX-NEXT: vzeroupper
21688 %0 = bitcast <2 x i64> %__a to <2 x double>
21689 %1 = bitcast <2 x i64> %__b to <2 x double>
21690 %2 = fcmp oeq <2 x double> %0, %1
21691 %3 = bitcast i2 %__u to <2 x i1>
21692 %4 = and <2 x i1> %2, %3
21693 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21694 %6 = bitcast <32 x i1> %5 to i32
21698 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21699 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
21700 ; VLX: # %bb.0: # %entry
21701 ; VLX-NEXT: kmovd %edi, %k1
21702 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21703 ; VLX-NEXT: kmovd %k0, %eax
21706 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
21707 ; NoVLX: # %bb.0: # %entry
21708 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21709 ; NoVLX-NEXT: kmovw %edi, %k1
21710 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21711 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21712 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21713 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21714 ; NoVLX-NEXT: kmovw %k0, %eax
21715 ; NoVLX-NEXT: vzeroupper
21718 %0 = bitcast <2 x i64> %__a to <2 x double>
21719 %load = load <2 x i64>, ptr %__b
21720 %1 = bitcast <2 x i64> %load to <2 x double>
21721 %2 = fcmp oeq <2 x double> %0, %1
21722 %3 = bitcast i2 %__u to <2 x i1>
21723 %4 = and <2 x i1> %2, %3
21724 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21725 %6 = bitcast <32 x i1> %5 to i32
21729 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21730 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21731 ; VLX: # %bb.0: # %entry
21732 ; VLX-NEXT: kmovd %edi, %k1
21733 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21734 ; VLX-NEXT: kmovd %k0, %eax
21737 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
21738 ; NoVLX: # %bb.0: # %entry
21739 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21740 ; NoVLX-NEXT: kmovw %edi, %k1
21741 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21742 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21743 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21744 ; NoVLX-NEXT: kmovw %k0, %eax
21745 ; NoVLX-NEXT: vzeroupper
21748 %0 = bitcast <2 x i64> %__a to <2 x double>
21749 %load = load double, ptr %__b
21750 %vec = insertelement <2 x double> undef, double %load, i32 0
21751 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21752 %2 = fcmp oeq <2 x double> %0, %1
21753 %3 = bitcast i2 %__u to <2 x i1>
21754 %4 = and <2 x i1> %2, %3
21755 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21756 %6 = bitcast <32 x i1> %5 to i32
21762 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21763 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
21764 ; VLX: # %bb.0: # %entry
21765 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21766 ; VLX-NEXT: kmovq %k0, %rax
21769 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
21770 ; NoVLX: # %bb.0: # %entry
21771 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21772 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21773 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21774 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21775 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21776 ; NoVLX-NEXT: kmovw %k0, %eax
21777 ; NoVLX-NEXT: vzeroupper
21780 %0 = bitcast <2 x i64> %__a to <2 x double>
21781 %1 = bitcast <2 x i64> %__b to <2 x double>
21782 %2 = fcmp oeq <2 x double> %0, %1
21783 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21784 %4 = bitcast <64 x i1> %3 to i64
21788 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21789 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
21790 ; VLX: # %bb.0: # %entry
21791 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21792 ; VLX-NEXT: kmovq %k0, %rax
21795 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
21796 ; NoVLX: # %bb.0: # %entry
21797 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21798 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21799 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21800 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21801 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21802 ; NoVLX-NEXT: kmovw %k0, %eax
21803 ; NoVLX-NEXT: vzeroupper
21806 %0 = bitcast <2 x i64> %__a to <2 x double>
21807 %load = load <2 x i64>, ptr %__b
21808 %1 = bitcast <2 x i64> %load to <2 x double>
21809 %2 = fcmp oeq <2 x double> %0, %1
21810 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21811 %4 = bitcast <64 x i1> %3 to i64
21815 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr {
21816 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21817 ; VLX: # %bb.0: # %entry
21818 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21819 ; VLX-NEXT: kmovq %k0, %rax
21822 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21823 ; NoVLX: # %bb.0: # %entry
21824 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21825 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
21826 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21827 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21828 ; NoVLX-NEXT: kmovw %k0, %eax
21829 ; NoVLX-NEXT: vzeroupper
21832 %0 = bitcast <2 x i64> %__a to <2 x double>
21833 %load = load double, ptr %__b
21834 %vec = insertelement <2 x double> undef, double %load, i32 0
21835 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21836 %2 = fcmp oeq <2 x double> %0, %1
21837 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21838 %4 = bitcast <64 x i1> %3 to i64
21842 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21843 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
21844 ; VLX: # %bb.0: # %entry
21845 ; VLX-NEXT: kmovd %edi, %k1
21846 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21847 ; VLX-NEXT: kmovq %k0, %rax
21850 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
21851 ; NoVLX: # %bb.0: # %entry
21852 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21853 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21854 ; NoVLX-NEXT: kmovw %edi, %k1
21855 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21856 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21857 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21858 ; NoVLX-NEXT: kmovw %k0, %eax
21859 ; NoVLX-NEXT: vzeroupper
21862 %0 = bitcast <2 x i64> %__a to <2 x double>
21863 %1 = bitcast <2 x i64> %__b to <2 x double>
21864 %2 = fcmp oeq <2 x double> %0, %1
21865 %3 = bitcast i2 %__u to <2 x i1>
21866 %4 = and <2 x i1> %2, %3
21867 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21868 %6 = bitcast <64 x i1> %5 to i64
21872 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21873 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
21874 ; VLX: # %bb.0: # %entry
21875 ; VLX-NEXT: kmovd %edi, %k1
21876 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21877 ; VLX-NEXT: kmovq %k0, %rax
21880 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
21881 ; NoVLX: # %bb.0: # %entry
21882 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21883 ; NoVLX-NEXT: kmovw %edi, %k1
21884 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21885 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21886 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21887 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21888 ; NoVLX-NEXT: kmovw %k0, %eax
21889 ; NoVLX-NEXT: vzeroupper
21892 %0 = bitcast <2 x i64> %__a to <2 x double>
21893 %load = load <2 x i64>, ptr %__b
21894 %1 = bitcast <2 x i64> %load to <2 x double>
21895 %2 = fcmp oeq <2 x double> %0, %1
21896 %3 = bitcast i2 %__u to <2 x i1>
21897 %4 = and <2 x i1> %2, %3
21898 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21899 %6 = bitcast <64 x i1> %5 to i64
21903 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr {
21904 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21905 ; VLX: # %bb.0: # %entry
21906 ; VLX-NEXT: kmovd %edi, %k1
21907 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21908 ; VLX-NEXT: kmovq %k0, %rax
21911 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
21912 ; NoVLX: # %bb.0: # %entry
21913 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21914 ; NoVLX-NEXT: kmovw %edi, %k1
21915 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
21916 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21917 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21918 ; NoVLX-NEXT: kmovw %k0, %eax
21919 ; NoVLX-NEXT: vzeroupper
21922 %0 = bitcast <2 x i64> %__a to <2 x double>
21923 %load = load double, ptr %__b
21924 %vec = insertelement <2 x double> undef, double %load, i32 0
21925 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21926 %2 = fcmp oeq <2 x double> %0, %1
21927 %3 = bitcast i2 %__u to <2 x i1>
21928 %4 = and <2 x i1> %2, %3
21929 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21930 %6 = bitcast <64 x i1> %5 to i64
21936 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
21937 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
21938 ; VLX: # %bb.0: # %entry
21939 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
21940 ; VLX-NEXT: kmovd %k0, %eax
21941 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21942 ; VLX-NEXT: vzeroupper
21945 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
21946 ; NoVLX: # %bb.0: # %entry
21947 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
21948 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
21949 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21950 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
21951 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
21952 ; NoVLX-NEXT: kmovw %k0, %eax
21953 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21954 ; NoVLX-NEXT: vzeroupper
21957 %0 = bitcast <4 x i64> %__a to <4 x double>
21958 %1 = bitcast <4 x i64> %__b to <4 x double>
21959 %2 = fcmp oeq <4 x double> %0, %1
21960 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21961 %4 = bitcast <8 x i1> %3 to i8
21965 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
21966 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
21967 ; VLX: # %bb.0: # %entry
21968 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
21969 ; VLX-NEXT: kmovd %k0, %eax
21970 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21971 ; VLX-NEXT: vzeroupper
21974 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
21975 ; NoVLX: # %bb.0: # %entry
21976 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
21977 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
21978 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21979 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
21980 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
21981 ; NoVLX-NEXT: kmovw %k0, %eax
21982 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21983 ; NoVLX-NEXT: vzeroupper
21986 %0 = bitcast <4 x i64> %__a to <4 x double>
21987 %load = load <4 x i64>, ptr %__b
21988 %1 = bitcast <4 x i64> %load to <4 x double>
21989 %2 = fcmp oeq <4 x double> %0, %1
21990 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21991 %4 = bitcast <8 x i1> %3 to i8
21995 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
21996 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
21997 ; VLX: # %bb.0: # %entry
21998 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
21999 ; VLX-NEXT: kmovd %k0, %eax
22000 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22001 ; VLX-NEXT: vzeroupper
22004 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22005 ; NoVLX: # %bb.0: # %entry
22006 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22007 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22008 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22009 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22010 ; NoVLX-NEXT: kmovw %k0, %eax
22011 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22012 ; NoVLX-NEXT: vzeroupper
22015 %0 = bitcast <4 x i64> %__a to <4 x double>
22016 %load = load double, ptr %__b
22017 %vec = insertelement <4 x double> undef, double %load, i32 0
22018 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22019 %2 = fcmp oeq <4 x double> %0, %1
22020 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22021 %4 = bitcast <8 x i1> %3 to i8
22025 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22026 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22027 ; VLX: # %bb.0: # %entry
22028 ; VLX-NEXT: kmovd %edi, %k1
22029 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22030 ; VLX-NEXT: kmovd %k0, %eax
22031 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22032 ; VLX-NEXT: vzeroupper
22035 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22036 ; NoVLX: # %bb.0: # %entry
22037 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22038 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22039 ; NoVLX-NEXT: kmovw %edi, %k1
22040 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22041 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22042 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22043 ; NoVLX-NEXT: kmovw %k0, %eax
22044 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22045 ; NoVLX-NEXT: vzeroupper
22048 %0 = bitcast <4 x i64> %__a to <4 x double>
22049 %1 = bitcast <4 x i64> %__b to <4 x double>
22050 %2 = fcmp oeq <4 x double> %0, %1
22051 %3 = bitcast i4 %__u to <4 x i1>
22052 %4 = and <4 x i1> %2, %3
22053 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22054 %6 = bitcast <8 x i1> %5 to i8
22058 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22059 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22060 ; VLX: # %bb.0: # %entry
22061 ; VLX-NEXT: kmovd %edi, %k1
22062 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22063 ; VLX-NEXT: kmovd %k0, %eax
22064 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22065 ; VLX-NEXT: vzeroupper
22068 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22069 ; NoVLX: # %bb.0: # %entry
22070 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22071 ; NoVLX-NEXT: kmovw %edi, %k1
22072 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22073 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22074 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22075 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22076 ; NoVLX-NEXT: kmovw %k0, %eax
22077 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22078 ; NoVLX-NEXT: vzeroupper
22081 %0 = bitcast <4 x i64> %__a to <4 x double>
22082 %load = load <4 x i64>, ptr %__b
22083 %1 = bitcast <4 x i64> %load to <4 x double>
22084 %2 = fcmp oeq <4 x double> %0, %1
22085 %3 = bitcast i4 %__u to <4 x i1>
22086 %4 = and <4 x i1> %2, %3
22087 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22088 %6 = bitcast <8 x i1> %5 to i8
22092 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22093 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22094 ; VLX: # %bb.0: # %entry
22095 ; VLX-NEXT: kmovd %edi, %k1
22096 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22097 ; VLX-NEXT: kmovd %k0, %eax
22098 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22099 ; VLX-NEXT: vzeroupper
22102 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22103 ; NoVLX: # %bb.0: # %entry
22104 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22105 ; NoVLX-NEXT: kmovw %edi, %k1
22106 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22107 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22108 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22109 ; NoVLX-NEXT: kmovw %k0, %eax
22110 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22111 ; NoVLX-NEXT: vzeroupper
22114 %0 = bitcast <4 x i64> %__a to <4 x double>
22115 %load = load double, ptr %__b
22116 %vec = insertelement <4 x double> undef, double %load, i32 0
22117 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22118 %2 = fcmp oeq <4 x double> %0, %1
22119 %3 = bitcast i4 %__u to <4 x i1>
22120 %4 = and <4 x i1> %2, %3
22121 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22122 %6 = bitcast <8 x i1> %5 to i8
22128 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22129 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22130 ; VLX: # %bb.0: # %entry
22131 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22132 ; VLX-NEXT: kmovd %k0, %eax
22133 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22134 ; VLX-NEXT: vzeroupper
22137 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22138 ; NoVLX: # %bb.0: # %entry
22139 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22140 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22141 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22142 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22143 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22144 ; NoVLX-NEXT: kmovw %k0, %eax
22145 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22146 ; NoVLX-NEXT: vzeroupper
22149 %0 = bitcast <4 x i64> %__a to <4 x double>
22150 %1 = bitcast <4 x i64> %__b to <4 x double>
22151 %2 = fcmp oeq <4 x double> %0, %1
22152 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22153 %4 = bitcast <16 x i1> %3 to i16
22157 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22158 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22159 ; VLX: # %bb.0: # %entry
22160 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22161 ; VLX-NEXT: kmovd %k0, %eax
22162 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22163 ; VLX-NEXT: vzeroupper
22166 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22167 ; NoVLX: # %bb.0: # %entry
22168 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22169 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22170 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22171 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22172 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22173 ; NoVLX-NEXT: kmovw %k0, %eax
22174 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22175 ; NoVLX-NEXT: vzeroupper
22178 %0 = bitcast <4 x i64> %__a to <4 x double>
22179 %load = load <4 x i64>, ptr %__b
22180 %1 = bitcast <4 x i64> %load to <4 x double>
22181 %2 = fcmp oeq <4 x double> %0, %1
22182 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22183 %4 = bitcast <16 x i1> %3 to i16
22187 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22188 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22189 ; VLX: # %bb.0: # %entry
22190 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22191 ; VLX-NEXT: kmovd %k0, %eax
22192 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22193 ; VLX-NEXT: vzeroupper
22196 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22197 ; NoVLX: # %bb.0: # %entry
22198 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22199 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22200 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22201 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22202 ; NoVLX-NEXT: kmovw %k0, %eax
22203 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22204 ; NoVLX-NEXT: vzeroupper
22207 %0 = bitcast <4 x i64> %__a to <4 x double>
22208 %load = load double, ptr %__b
22209 %vec = insertelement <4 x double> undef, double %load, i32 0
22210 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22211 %2 = fcmp oeq <4 x double> %0, %1
22212 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22213 %4 = bitcast <16 x i1> %3 to i16
22217 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22218 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22219 ; VLX: # %bb.0: # %entry
22220 ; VLX-NEXT: kmovd %edi, %k1
22221 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22222 ; VLX-NEXT: kmovd %k0, %eax
22223 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22224 ; VLX-NEXT: vzeroupper
22227 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22228 ; NoVLX: # %bb.0: # %entry
22229 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22230 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22231 ; NoVLX-NEXT: kmovw %edi, %k1
22232 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22233 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22234 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22235 ; NoVLX-NEXT: kmovw %k0, %eax
22236 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22237 ; NoVLX-NEXT: vzeroupper
22240 %0 = bitcast <4 x i64> %__a to <4 x double>
22241 %1 = bitcast <4 x i64> %__b to <4 x double>
22242 %2 = fcmp oeq <4 x double> %0, %1
22243 %3 = bitcast i4 %__u to <4 x i1>
22244 %4 = and <4 x i1> %2, %3
22245 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22246 %6 = bitcast <16 x i1> %5 to i16
22250 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22251 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22252 ; VLX: # %bb.0: # %entry
22253 ; VLX-NEXT: kmovd %edi, %k1
22254 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22255 ; VLX-NEXT: kmovd %k0, %eax
22256 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22257 ; VLX-NEXT: vzeroupper
22260 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22261 ; NoVLX: # %bb.0: # %entry
22262 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22263 ; NoVLX-NEXT: kmovw %edi, %k1
22264 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22265 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22266 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22267 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22268 ; NoVLX-NEXT: kmovw %k0, %eax
22269 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22270 ; NoVLX-NEXT: vzeroupper
22273 %0 = bitcast <4 x i64> %__a to <4 x double>
22274 %load = load <4 x i64>, ptr %__b
22275 %1 = bitcast <4 x i64> %load to <4 x double>
22276 %2 = fcmp oeq <4 x double> %0, %1
22277 %3 = bitcast i4 %__u to <4 x i1>
22278 %4 = and <4 x i1> %2, %3
22279 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22280 %6 = bitcast <16 x i1> %5 to i16
22284 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22285 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22286 ; VLX: # %bb.0: # %entry
22287 ; VLX-NEXT: kmovd %edi, %k1
22288 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22289 ; VLX-NEXT: kmovd %k0, %eax
22290 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22291 ; VLX-NEXT: vzeroupper
22294 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22295 ; NoVLX: # %bb.0: # %entry
22296 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22297 ; NoVLX-NEXT: kmovw %edi, %k1
22298 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22299 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22300 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22301 ; NoVLX-NEXT: kmovw %k0, %eax
22302 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22303 ; NoVLX-NEXT: vzeroupper
22306 %0 = bitcast <4 x i64> %__a to <4 x double>
22307 %load = load double, ptr %__b
22308 %vec = insertelement <4 x double> undef, double %load, i32 0
22309 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22310 %2 = fcmp oeq <4 x double> %0, %1
22311 %3 = bitcast i4 %__u to <4 x i1>
22312 %4 = and <4 x i1> %2, %3
22313 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22314 %6 = bitcast <16 x i1> %5 to i16
22320 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22321 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22322 ; VLX: # %bb.0: # %entry
22323 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22324 ; VLX-NEXT: kmovd %k0, %eax
22325 ; VLX-NEXT: vzeroupper
22328 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22329 ; NoVLX: # %bb.0: # %entry
22330 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22331 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22332 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22333 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22334 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22335 ; NoVLX-NEXT: kmovw %k0, %eax
22336 ; NoVLX-NEXT: vzeroupper
22339 %0 = bitcast <4 x i64> %__a to <4 x double>
22340 %1 = bitcast <4 x i64> %__b to <4 x double>
22341 %2 = fcmp oeq <4 x double> %0, %1
22342 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22343 %4 = bitcast <32 x i1> %3 to i32
22347 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22348 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22349 ; VLX: # %bb.0: # %entry
22350 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22351 ; VLX-NEXT: kmovd %k0, %eax
22352 ; VLX-NEXT: vzeroupper
22355 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22356 ; NoVLX: # %bb.0: # %entry
22357 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22358 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22359 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22360 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22361 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22362 ; NoVLX-NEXT: kmovw %k0, %eax
22363 ; NoVLX-NEXT: vzeroupper
22366 %0 = bitcast <4 x i64> %__a to <4 x double>
22367 %load = load <4 x i64>, ptr %__b
22368 %1 = bitcast <4 x i64> %load to <4 x double>
22369 %2 = fcmp oeq <4 x double> %0, %1
22370 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22371 %4 = bitcast <32 x i1> %3 to i32
22375 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22376 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22377 ; VLX: # %bb.0: # %entry
22378 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22379 ; VLX-NEXT: kmovd %k0, %eax
22380 ; VLX-NEXT: vzeroupper
22383 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22384 ; NoVLX: # %bb.0: # %entry
22385 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22386 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22387 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22388 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22389 ; NoVLX-NEXT: kmovw %k0, %eax
22390 ; NoVLX-NEXT: vzeroupper
22393 %0 = bitcast <4 x i64> %__a to <4 x double>
22394 %load = load double, ptr %__b
22395 %vec = insertelement <4 x double> undef, double %load, i32 0
22396 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22397 %2 = fcmp oeq <4 x double> %0, %1
22398 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22399 %4 = bitcast <32 x i1> %3 to i32
22403 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22404 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22405 ; VLX: # %bb.0: # %entry
22406 ; VLX-NEXT: kmovd %edi, %k1
22407 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22408 ; VLX-NEXT: kmovd %k0, %eax
22409 ; VLX-NEXT: vzeroupper
22412 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22413 ; NoVLX: # %bb.0: # %entry
22414 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22415 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22416 ; NoVLX-NEXT: kmovw %edi, %k1
22417 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22418 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22419 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22420 ; NoVLX-NEXT: kmovw %k0, %eax
22421 ; NoVLX-NEXT: vzeroupper
22424 %0 = bitcast <4 x i64> %__a to <4 x double>
22425 %1 = bitcast <4 x i64> %__b to <4 x double>
22426 %2 = fcmp oeq <4 x double> %0, %1
22427 %3 = bitcast i4 %__u to <4 x i1>
22428 %4 = and <4 x i1> %2, %3
22429 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22430 %6 = bitcast <32 x i1> %5 to i32
22434 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22435 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22436 ; VLX: # %bb.0: # %entry
22437 ; VLX-NEXT: kmovd %edi, %k1
22438 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22439 ; VLX-NEXT: kmovd %k0, %eax
22440 ; VLX-NEXT: vzeroupper
22443 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22444 ; NoVLX: # %bb.0: # %entry
22445 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22446 ; NoVLX-NEXT: kmovw %edi, %k1
22447 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22448 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22449 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22450 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22451 ; NoVLX-NEXT: kmovw %k0, %eax
22452 ; NoVLX-NEXT: vzeroupper
22455 %0 = bitcast <4 x i64> %__a to <4 x double>
22456 %load = load <4 x i64>, ptr %__b
22457 %1 = bitcast <4 x i64> %load to <4 x double>
22458 %2 = fcmp oeq <4 x double> %0, %1
22459 %3 = bitcast i4 %__u to <4 x i1>
22460 %4 = and <4 x i1> %2, %3
22461 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22462 %6 = bitcast <32 x i1> %5 to i32
22466 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22467 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22468 ; VLX: # %bb.0: # %entry
22469 ; VLX-NEXT: kmovd %edi, %k1
22470 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22471 ; VLX-NEXT: kmovd %k0, %eax
22472 ; VLX-NEXT: vzeroupper
22475 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22476 ; NoVLX: # %bb.0: # %entry
22477 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22478 ; NoVLX-NEXT: kmovw %edi, %k1
22479 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22480 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22481 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22482 ; NoVLX-NEXT: kmovw %k0, %eax
22483 ; NoVLX-NEXT: vzeroupper
22486 %0 = bitcast <4 x i64> %__a to <4 x double>
22487 %load = load double, ptr %__b
22488 %vec = insertelement <4 x double> undef, double %load, i32 0
22489 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22490 %2 = fcmp oeq <4 x double> %0, %1
22491 %3 = bitcast i4 %__u to <4 x i1>
22492 %4 = and <4 x i1> %2, %3
22493 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22494 %6 = bitcast <32 x i1> %5 to i32
22500 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22501 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22502 ; VLX: # %bb.0: # %entry
22503 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22504 ; VLX-NEXT: kmovq %k0, %rax
22505 ; VLX-NEXT: vzeroupper
22508 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22509 ; NoVLX: # %bb.0: # %entry
22510 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22511 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22512 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22513 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22514 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22515 ; NoVLX-NEXT: kmovw %k0, %eax
22516 ; NoVLX-NEXT: vzeroupper
22519 %0 = bitcast <4 x i64> %__a to <4 x double>
22520 %1 = bitcast <4 x i64> %__b to <4 x double>
22521 %2 = fcmp oeq <4 x double> %0, %1
22522 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22523 %4 = bitcast <64 x i1> %3 to i64
22527 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22528 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22529 ; VLX: # %bb.0: # %entry
22530 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22531 ; VLX-NEXT: kmovq %k0, %rax
22532 ; VLX-NEXT: vzeroupper
22535 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22536 ; NoVLX: # %bb.0: # %entry
22537 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22538 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22539 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22540 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22541 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22542 ; NoVLX-NEXT: kmovw %k0, %eax
22543 ; NoVLX-NEXT: vzeroupper
22546 %0 = bitcast <4 x i64> %__a to <4 x double>
22547 %load = load <4 x i64>, ptr %__b
22548 %1 = bitcast <4 x i64> %load to <4 x double>
22549 %2 = fcmp oeq <4 x double> %0, %1
22550 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22551 %4 = bitcast <64 x i1> %3 to i64
22555 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr {
22556 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22557 ; VLX: # %bb.0: # %entry
22558 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22559 ; VLX-NEXT: kmovq %k0, %rax
22560 ; VLX-NEXT: vzeroupper
22563 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22564 ; NoVLX: # %bb.0: # %entry
22565 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22566 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22567 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22568 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22569 ; NoVLX-NEXT: kmovw %k0, %eax
22570 ; NoVLX-NEXT: vzeroupper
22573 %0 = bitcast <4 x i64> %__a to <4 x double>
22574 %load = load double, ptr %__b
22575 %vec = insertelement <4 x double> undef, double %load, i32 0
22576 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22577 %2 = fcmp oeq <4 x double> %0, %1
22578 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22579 %4 = bitcast <64 x i1> %3 to i64
22583 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22584 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
22585 ; VLX: # %bb.0: # %entry
22586 ; VLX-NEXT: kmovd %edi, %k1
22587 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22588 ; VLX-NEXT: kmovq %k0, %rax
22589 ; VLX-NEXT: vzeroupper
22592 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
22593 ; NoVLX: # %bb.0: # %entry
22594 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22595 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22596 ; NoVLX-NEXT: kmovw %edi, %k1
22597 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22598 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22599 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22600 ; NoVLX-NEXT: kmovw %k0, %eax
22601 ; NoVLX-NEXT: vzeroupper
22604 %0 = bitcast <4 x i64> %__a to <4 x double>
22605 %1 = bitcast <4 x i64> %__b to <4 x double>
22606 %2 = fcmp oeq <4 x double> %0, %1
22607 %3 = bitcast i4 %__u to <4 x i1>
22608 %4 = and <4 x i1> %2, %3
22609 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22610 %6 = bitcast <64 x i1> %5 to i64
22614 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22615 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
22616 ; VLX: # %bb.0: # %entry
22617 ; VLX-NEXT: kmovd %edi, %k1
22618 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22619 ; VLX-NEXT: kmovq %k0, %rax
22620 ; VLX-NEXT: vzeroupper
22623 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
22624 ; NoVLX: # %bb.0: # %entry
22625 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22626 ; NoVLX-NEXT: kmovw %edi, %k1
22627 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22628 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22629 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22630 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22631 ; NoVLX-NEXT: kmovw %k0, %eax
22632 ; NoVLX-NEXT: vzeroupper
22635 %0 = bitcast <4 x i64> %__a to <4 x double>
22636 %load = load <4 x i64>, ptr %__b
22637 %1 = bitcast <4 x i64> %load to <4 x double>
22638 %2 = fcmp oeq <4 x double> %0, %1
22639 %3 = bitcast i4 %__u to <4 x i1>
22640 %4 = and <4 x i1> %2, %3
22641 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22642 %6 = bitcast <64 x i1> %5 to i64
22646 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr {
22647 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22648 ; VLX: # %bb.0: # %entry
22649 ; VLX-NEXT: kmovd %edi, %k1
22650 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22651 ; VLX-NEXT: kmovq %k0, %rax
22652 ; VLX-NEXT: vzeroupper
22655 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22656 ; NoVLX: # %bb.0: # %entry
22657 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22658 ; NoVLX-NEXT: kmovw %edi, %k1
22659 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22660 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22661 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22662 ; NoVLX-NEXT: kmovw %k0, %eax
22663 ; NoVLX-NEXT: vzeroupper
22666 %0 = bitcast <4 x i64> %__a to <4 x double>
22667 %load = load double, ptr %__b
22668 %vec = insertelement <4 x double> undef, double %load, i32 0
22669 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22670 %2 = fcmp oeq <4 x double> %0, %1
22671 %3 = bitcast i4 %__u to <4 x i1>
22672 %4 = and <4 x i1> %2, %3
22673 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22674 %6 = bitcast <64 x i1> %5 to i64
22680 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22681 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
22682 ; VLX: # %bb.0: # %entry
22683 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22684 ; VLX-NEXT: kmovd %k0, %eax
22685 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22686 ; VLX-NEXT: vzeroupper
22689 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
22690 ; NoVLX: # %bb.0: # %entry
22691 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22692 ; NoVLX-NEXT: kmovw %k0, %eax
22693 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22694 ; NoVLX-NEXT: vzeroupper
22697 %0 = bitcast <8 x i64> %__a to <8 x double>
22698 %1 = bitcast <8 x i64> %__b to <8 x double>
22699 %2 = fcmp oeq <8 x double> %0, %1
22700 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22701 %4 = bitcast <16 x i1> %3 to i16
22705 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
22706 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
22707 ; VLX: # %bb.0: # %entry
22708 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
22709 ; VLX-NEXT: kmovd %k0, %eax
22710 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22711 ; VLX-NEXT: vzeroupper
22714 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
22715 ; NoVLX: # %bb.0: # %entry
22716 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
22717 ; NoVLX-NEXT: kmovw %k0, %eax
22718 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22719 ; NoVLX-NEXT: vzeroupper
22722 %0 = bitcast <8 x i64> %__a to <8 x double>
22723 %load = load <8 x i64>, ptr %__b
22724 %1 = bitcast <8 x i64> %load to <8 x double>
22725 %2 = fcmp oeq <8 x double> %0, %1
22726 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22727 %4 = bitcast <16 x i1> %3 to i16
22731 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
22732 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22733 ; VLX: # %bb.0: # %entry
22734 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22735 ; VLX-NEXT: kmovd %k0, %eax
22736 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22737 ; VLX-NEXT: vzeroupper
22740 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22741 ; NoVLX: # %bb.0: # %entry
22742 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22743 ; NoVLX-NEXT: kmovw %k0, %eax
22744 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22745 ; NoVLX-NEXT: vzeroupper
22748 %0 = bitcast <8 x i64> %__a to <8 x double>
22749 %load = load double, ptr %__b
22750 %vec = insertelement <8 x double> undef, double %load, i32 0
22751 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22752 %2 = fcmp oeq <8 x double> %0, %1
22753 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22754 %4 = bitcast <16 x i1> %3 to i16
22758 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22759 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
22760 ; VLX: # %bb.0: # %entry
22761 ; VLX-NEXT: kmovd %edi, %k1
22762 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22763 ; VLX-NEXT: kmovd %k0, %eax
22764 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22765 ; VLX-NEXT: vzeroupper
22768 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
22769 ; NoVLX: # %bb.0: # %entry
22770 ; NoVLX-NEXT: kmovw %edi, %k1
22771 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22772 ; NoVLX-NEXT: kmovw %k0, %eax
22773 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22774 ; NoVLX-NEXT: vzeroupper
22777 %0 = bitcast <8 x i64> %__a to <8 x double>
22778 %1 = bitcast <8 x i64> %__b to <8 x double>
22779 %2 = fcmp oeq <8 x double> %0, %1
22780 %3 = bitcast i8 %__u to <8 x i1>
22781 %4 = and <8 x i1> %2, %3
22782 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22783 %6 = bitcast <16 x i1> %5 to i16
22787 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
22788 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
22789 ; VLX: # %bb.0: # %entry
22790 ; VLX-NEXT: kmovd %edi, %k1
22791 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
22792 ; VLX-NEXT: kmovd %k0, %eax
22793 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22794 ; VLX-NEXT: vzeroupper
22797 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
22798 ; NoVLX: # %bb.0: # %entry
22799 ; NoVLX-NEXT: kmovw %edi, %k1
22800 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
22801 ; NoVLX-NEXT: kmovw %k0, %eax
22802 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22803 ; NoVLX-NEXT: vzeroupper
22806 %0 = bitcast <8 x i64> %__a to <8 x double>
22807 %load = load <8 x i64>, ptr %__b
22808 %1 = bitcast <8 x i64> %load to <8 x double>
22809 %2 = fcmp oeq <8 x double> %0, %1
22810 %3 = bitcast i8 %__u to <8 x i1>
22811 %4 = and <8 x i1> %2, %3
22812 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22813 %6 = bitcast <16 x i1> %5 to i16
22817 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
22818 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22819 ; VLX: # %bb.0: # %entry
22820 ; VLX-NEXT: kmovd %edi, %k1
22821 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22822 ; VLX-NEXT: kmovd %k0, %eax
22823 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22824 ; VLX-NEXT: vzeroupper
22827 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
22828 ; NoVLX: # %bb.0: # %entry
22829 ; NoVLX-NEXT: kmovw %edi, %k1
22830 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
22831 ; NoVLX-NEXT: kmovw %k0, %eax
22832 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22833 ; NoVLX-NEXT: vzeroupper
22836 %0 = bitcast <8 x i64> %__a to <8 x double>
22837 %load = load double, ptr %__b
22838 %vec = insertelement <8 x double> undef, double %load, i32 0
22839 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22840 %2 = fcmp oeq <8 x double> %0, %1
22841 %3 = bitcast i8 %__u to <8 x i1>
22842 %4 = and <8 x i1> %2, %3
22843 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22844 %6 = bitcast <16 x i1> %5 to i16
22850 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22851 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
22852 ; VLX: # %bb.0: # %entry
22853 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
22854 ; VLX-NEXT: kmovd %k0, %eax
22855 ; VLX-NEXT: movzbl %al, %eax
22856 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22857 ; VLX-NEXT: vzeroupper
22860 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
22861 ; NoVLX: # %bb.0: # %entry
22862 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
22863 ; NoVLX-NEXT: kmovw %k0, %eax
22864 ; NoVLX-NEXT: movzbl %al, %eax
22865 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22866 ; NoVLX-NEXT: vzeroupper
22869 %0 = bitcast <8 x i64> %__a to <8 x double>
22870 %1 = bitcast <8 x i64> %__b to <8 x double>
22871 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
22872 %3 = bitcast <8 x i1> %2 to i8
22873 %4 = zext i8 %3 to i16
22877 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22878 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
22879 ; VLX: # %bb.0: # %entry
22880 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
22881 ; VLX-NEXT: kmovd %k0, %eax
22882 ; VLX-NEXT: andb %dil, %al
22883 ; VLX-NEXT: movzbl %al, %eax
22884 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22885 ; VLX-NEXT: vzeroupper
22888 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
22889 ; NoVLX: # %bb.0: # %entry
22890 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
22891 ; NoVLX-NEXT: kmovw %k0, %eax
22892 ; NoVLX-NEXT: andb %dil, %al
22893 ; NoVLX-NEXT: movzbl %al, %eax
22894 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22895 ; NoVLX-NEXT: vzeroupper
22898 %0 = bitcast <8 x i64> %__a to <8 x double>
22899 %1 = bitcast <8 x i64> %__b to <8 x double>
22900 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
22901 %3 = bitcast i8 %__u to <8 x i1>
22902 %4 = and <8 x i1> %2, %3
22903 %5 = bitcast <8 x i1> %4 to i8
22904 %6 = zext i8 %5 to i16
22910 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22911 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
22912 ; VLX: # %bb.0: # %entry
22913 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22914 ; VLX-NEXT: kmovd %k0, %eax
22915 ; VLX-NEXT: vzeroupper
22918 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
22919 ; NoVLX: # %bb.0: # %entry
22920 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22921 ; NoVLX-NEXT: kmovw %k0, %eax
22922 ; NoVLX-NEXT: vzeroupper
22925 %0 = bitcast <8 x i64> %__a to <8 x double>
22926 %1 = bitcast <8 x i64> %__b to <8 x double>
22927 %2 = fcmp oeq <8 x double> %0, %1
22928 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22929 %4 = bitcast <32 x i1> %3 to i32
22933 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
22934 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
22935 ; VLX: # %bb.0: # %entry
22936 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
22937 ; VLX-NEXT: kmovd %k0, %eax
22938 ; VLX-NEXT: vzeroupper
22941 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
22942 ; NoVLX: # %bb.0: # %entry
22943 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
22944 ; NoVLX-NEXT: kmovw %k0, %eax
22945 ; NoVLX-NEXT: vzeroupper
22948 %0 = bitcast <8 x i64> %__a to <8 x double>
22949 %load = load <8 x i64>, ptr %__b
22950 %1 = bitcast <8 x i64> %load to <8 x double>
22951 %2 = fcmp oeq <8 x double> %0, %1
22952 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22953 %4 = bitcast <32 x i1> %3 to i32
22957 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
22958 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
22959 ; VLX: # %bb.0: # %entry
22960 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22961 ; VLX-NEXT: kmovd %k0, %eax
22962 ; VLX-NEXT: vzeroupper
22965 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
22966 ; NoVLX: # %bb.0: # %entry
22967 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
22968 ; NoVLX-NEXT: kmovw %k0, %eax
22969 ; NoVLX-NEXT: vzeroupper
22972 %0 = bitcast <8 x i64> %__a to <8 x double>
22973 %load = load double, ptr %__b
22974 %vec = insertelement <8 x double> undef, double %load, i32 0
22975 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
22976 %2 = fcmp oeq <8 x double> %0, %1
22977 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
22978 %4 = bitcast <32 x i1> %3 to i32
22982 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
22983 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
22984 ; VLX: # %bb.0: # %entry
22985 ; VLX-NEXT: kmovd %edi, %k1
22986 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22987 ; VLX-NEXT: kmovd %k0, %eax
22988 ; VLX-NEXT: vzeroupper
22991 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
22992 ; NoVLX: # %bb.0: # %entry
22993 ; NoVLX-NEXT: kmovw %edi, %k1
22994 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22995 ; NoVLX-NEXT: kmovw %k0, %eax
22996 ; NoVLX-NEXT: vzeroupper
22999 %0 = bitcast <8 x i64> %__a to <8 x double>
23000 %1 = bitcast <8 x i64> %__b to <8 x double>
23001 %2 = fcmp oeq <8 x double> %0, %1
23002 %3 = bitcast i8 %__u to <8 x i1>
23003 %4 = and <8 x i1> %2, %3
23004 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23005 %6 = bitcast <32 x i1> %5 to i32
23009 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
23010 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23011 ; VLX: # %bb.0: # %entry
23012 ; VLX-NEXT: kmovd %edi, %k1
23013 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23014 ; VLX-NEXT: kmovd %k0, %eax
23015 ; VLX-NEXT: vzeroupper
23018 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23019 ; NoVLX: # %bb.0: # %entry
23020 ; NoVLX-NEXT: kmovw %edi, %k1
23021 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23022 ; NoVLX-NEXT: kmovw %k0, %eax
23023 ; NoVLX-NEXT: vzeroupper
23026 %0 = bitcast <8 x i64> %__a to <8 x double>
23027 %load = load <8 x i64>, ptr %__b
23028 %1 = bitcast <8 x i64> %load to <8 x double>
23029 %2 = fcmp oeq <8 x double> %0, %1
23030 %3 = bitcast i8 %__u to <8 x i1>
23031 %4 = and <8 x i1> %2, %3
23032 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23033 %6 = bitcast <32 x i1> %5 to i32
23037 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
23038 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23039 ; VLX: # %bb.0: # %entry
23040 ; VLX-NEXT: kmovd %edi, %k1
23041 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23042 ; VLX-NEXT: kmovd %k0, %eax
23043 ; VLX-NEXT: vzeroupper
23046 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23047 ; NoVLX: # %bb.0: # %entry
23048 ; NoVLX-NEXT: kmovw %edi, %k1
23049 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23050 ; NoVLX-NEXT: kmovw %k0, %eax
23051 ; NoVLX-NEXT: vzeroupper
23054 %0 = bitcast <8 x i64> %__a to <8 x double>
23055 %load = load double, ptr %__b
23056 %vec = insertelement <8 x double> undef, double %load, i32 0
23057 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23058 %2 = fcmp oeq <8 x double> %0, %1
23059 %3 = bitcast i8 %__u to <8 x i1>
23060 %4 = and <8 x i1> %2, %3
23061 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23062 %6 = bitcast <32 x i1> %5 to i32
23068 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23069 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23070 ; VLX: # %bb.0: # %entry
23071 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23072 ; VLX-NEXT: kmovb %k0, %eax
23073 ; VLX-NEXT: vzeroupper
23076 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23077 ; NoVLX: # %bb.0: # %entry
23078 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23079 ; NoVLX-NEXT: kmovw %k0, %eax
23080 ; NoVLX-NEXT: movzbl %al, %eax
23081 ; NoVLX-NEXT: vzeroupper
23084 %0 = bitcast <8 x i64> %__a to <8 x double>
23085 %1 = bitcast <8 x i64> %__b to <8 x double>
23086 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
23087 %3 = bitcast <8 x i1> %2 to i8
23088 %4 = zext i8 %3 to i32
23092 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23093 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23094 ; VLX: # %bb.0: # %entry
23095 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23096 ; VLX-NEXT: kmovd %k0, %eax
23097 ; VLX-NEXT: andb %dil, %al
23098 ; VLX-NEXT: movzbl %al, %eax
23099 ; VLX-NEXT: vzeroupper
23102 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23103 ; NoVLX: # %bb.0: # %entry
23104 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23105 ; NoVLX-NEXT: kmovw %k0, %eax
23106 ; NoVLX-NEXT: andb %dil, %al
23107 ; NoVLX-NEXT: movzbl %al, %eax
23108 ; NoVLX-NEXT: vzeroupper
23111 %0 = bitcast <8 x i64> %__a to <8 x double>
23112 %1 = bitcast <8 x i64> %__b to <8 x double>
23113 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
23114 %3 = bitcast i8 %__u to <8 x i1>
23115 %4 = and <8 x i1> %2, %3
23116 %5 = bitcast <8 x i1> %4 to i8
23117 %6 = zext i8 %5 to i32
23123 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23124 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23125 ; VLX: # %bb.0: # %entry
23126 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23127 ; VLX-NEXT: kmovq %k0, %rax
23128 ; VLX-NEXT: vzeroupper
23131 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23132 ; NoVLX: # %bb.0: # %entry
23133 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23134 ; NoVLX-NEXT: kmovw %k0, %eax
23135 ; NoVLX-NEXT: vzeroupper
23138 %0 = bitcast <8 x i64> %__a to <8 x double>
23139 %1 = bitcast <8 x i64> %__b to <8 x double>
23140 %2 = fcmp oeq <8 x double> %0, %1
23141 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23142 %4 = bitcast <64 x i1> %3 to i64
23146 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
23147 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23148 ; VLX: # %bb.0: # %entry
23149 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23150 ; VLX-NEXT: kmovq %k0, %rax
23151 ; VLX-NEXT: vzeroupper
23154 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23155 ; NoVLX: # %bb.0: # %entry
23156 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23157 ; NoVLX-NEXT: kmovw %k0, %eax
23158 ; NoVLX-NEXT: vzeroupper
23161 %0 = bitcast <8 x i64> %__a to <8 x double>
23162 %load = load <8 x i64>, ptr %__b
23163 %1 = bitcast <8 x i64> %load to <8 x double>
23164 %2 = fcmp oeq <8 x double> %0, %1
23165 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23166 %4 = bitcast <64 x i1> %3 to i64
23170 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr {
23171 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23172 ; VLX: # %bb.0: # %entry
23173 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23174 ; VLX-NEXT: kmovq %k0, %rax
23175 ; VLX-NEXT: vzeroupper
23178 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23179 ; NoVLX: # %bb.0: # %entry
23180 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23181 ; NoVLX-NEXT: kmovw %k0, %eax
23182 ; NoVLX-NEXT: vzeroupper
23185 %0 = bitcast <8 x i64> %__a to <8 x double>
23186 %load = load double, ptr %__b
23187 %vec = insertelement <8 x double> undef, double %load, i32 0
23188 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23189 %2 = fcmp oeq <8 x double> %0, %1
23190 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23191 %4 = bitcast <64 x i1> %3 to i64
23195 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23196 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23197 ; VLX: # %bb.0: # %entry
23198 ; VLX-NEXT: kmovd %edi, %k1
23199 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23200 ; VLX-NEXT: kmovq %k0, %rax
23201 ; VLX-NEXT: vzeroupper
23204 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23205 ; NoVLX: # %bb.0: # %entry
23206 ; NoVLX-NEXT: kmovw %edi, %k1
23207 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23208 ; NoVLX-NEXT: kmovw %k0, %eax
23209 ; NoVLX-NEXT: vzeroupper
23212 %0 = bitcast <8 x i64> %__a to <8 x double>
23213 %1 = bitcast <8 x i64> %__b to <8 x double>
23214 %2 = fcmp oeq <8 x double> %0, %1
23215 %3 = bitcast i8 %__u to <8 x i1>
23216 %4 = and <8 x i1> %2, %3
23217 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23218 %6 = bitcast <64 x i1> %5 to i64
23222 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
23223 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23224 ; VLX: # %bb.0: # %entry
23225 ; VLX-NEXT: kmovd %edi, %k1
23226 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23227 ; VLX-NEXT: kmovq %k0, %rax
23228 ; VLX-NEXT: vzeroupper
23231 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23232 ; NoVLX: # %bb.0: # %entry
23233 ; NoVLX-NEXT: kmovw %edi, %k1
23234 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23235 ; NoVLX-NEXT: kmovw %k0, %eax
23236 ; NoVLX-NEXT: vzeroupper
23239 %0 = bitcast <8 x i64> %__a to <8 x double>
23240 %load = load <8 x i64>, ptr %__b
23241 %1 = bitcast <8 x i64> %load to <8 x double>
23242 %2 = fcmp oeq <8 x double> %0, %1
23243 %3 = bitcast i8 %__u to <8 x i1>
23244 %4 = and <8 x i1> %2, %3
23245 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23246 %6 = bitcast <64 x i1> %5 to i64
23250 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr {
23251 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23252 ; VLX: # %bb.0: # %entry
23253 ; VLX-NEXT: kmovd %edi, %k1
23254 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23255 ; VLX-NEXT: kmovq %k0, %rax
23256 ; VLX-NEXT: vzeroupper
23259 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23260 ; NoVLX: # %bb.0: # %entry
23261 ; NoVLX-NEXT: kmovw %edi, %k1
23262 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23263 ; NoVLX-NEXT: kmovw %k0, %eax
23264 ; NoVLX-NEXT: vzeroupper
23267 %0 = bitcast <8 x i64> %__a to <8 x double>
23268 %load = load double, ptr %__b
23269 %vec = insertelement <8 x double> undef, double %load, i32 0
23270 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23271 %2 = fcmp oeq <8 x double> %0, %1
23272 %3 = bitcast i8 %__u to <8 x i1>
23273 %4 = and <8 x i1> %2, %3
23274 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23275 %6 = bitcast <64 x i1> %5 to i64
23281 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23282 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23283 ; VLX: # %bb.0: # %entry
23284 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23285 ; VLX-NEXT: kmovb %k0, %eax
23286 ; VLX-NEXT: vzeroupper
23289 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23290 ; NoVLX: # %bb.0: # %entry
23291 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23292 ; NoVLX-NEXT: kmovw %k0, %eax
23293 ; NoVLX-NEXT: movzbl %al, %eax
23294 ; NoVLX-NEXT: vzeroupper
23297 %0 = bitcast <8 x i64> %__a to <8 x double>
23298 %1 = bitcast <8 x i64> %__b to <8 x double>
23299 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
23300 %3 = bitcast <8 x i1> %2 to i8
23301 %4 = zext i8 %3 to i64
23305 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23306 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23307 ; VLX: # %bb.0: # %entry
23308 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23309 ; VLX-NEXT: kmovd %k0, %eax
23310 ; VLX-NEXT: andb %dil, %al
23311 ; VLX-NEXT: movzbl %al, %eax
23312 ; VLX-NEXT: vzeroupper
23315 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23316 ; NoVLX: # %bb.0: # %entry
23317 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23318 ; NoVLX-NEXT: kmovw %k0, %eax
23319 ; NoVLX-NEXT: andb %dil, %al
23320 ; NoVLX-NEXT: movzbl %al, %eax
23321 ; NoVLX-NEXT: vzeroupper
23324 %0 = bitcast <8 x i64> %__a to <8 x double>
23325 %1 = bitcast <8 x i64> %__b to <8 x double>
23326 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
23327 %3 = bitcast i8 %__u to <8 x i1>
23328 %4 = and <8 x i1> %2, %3
23329 %5 = bitcast <8 x i1> %4 to i8
23330 %6 = zext i8 %5 to i64
23334 ; Test that we understand that cmpps with rounding zeros the upper bits of the mask register.
23335 define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) {
23336 ; VLX-LABEL: test_cmpm_rnd_zero:
23338 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
23339 ; VLX-NEXT: kmovd %k0, %eax
23340 ; VLX-NEXT: vzeroupper
23343 ; NoVLX-LABEL: test_cmpm_rnd_zero:
23345 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
23346 ; NoVLX-NEXT: kmovw %k0, %eax
23347 ; NoVLX-NEXT: vzeroupper
23349 %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
23350 %1 = bitcast <16 x i1> %res to i16
23351 %cast = bitcast i16 %1 to <16 x i1>
23352 %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
23353 %cast2 = bitcast <32 x i1> %shuffle to i32
23357 define i8 @mask_zero_lower(<4 x i32> %a) {
23358 ; VLX-LABEL: mask_zero_lower:
23360 ; VLX-NEXT: vptestmd %xmm0, %xmm0, %k0
23361 ; VLX-NEXT: kshiftlb $4, %k0, %k0
23362 ; VLX-NEXT: kmovd %k0, %eax
23363 ; VLX-NEXT: # kill: def $al killed $al killed $eax
23366 ; NoVLX-LABEL: mask_zero_lower:
23368 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23369 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
23370 ; NoVLX-NEXT: kshiftlw $4, %k0, %k0
23371 ; NoVLX-NEXT: kmovw %k0, %eax
23372 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
23373 ; NoVLX-NEXT: vzeroupper
23375 %cmp = icmp ne <4 x i32> %a, zeroinitializer
23376 %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
23377 %cast = bitcast <8 x i1> %concat to i8