1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
5 define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6 ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
7 ; VLX: # %bb.0: # %entry
8 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
9 ; VLX-NEXT: kmovd %k0, %eax
12 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
13 ; NoVLX: # %bb.0: # %entry
14 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
15 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
16 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
17 ; NoVLX-NEXT: kmovw %k0, %eax
18 ; NoVLX-NEXT: vzeroupper
21 %0 = bitcast <2 x i64> %__a to <16 x i8>
22 %1 = bitcast <2 x i64> %__b to <16 x i8>
23 %2 = icmp eq <16 x i8> %0, %1
24 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
25 %4 = bitcast <32 x i1> %3 to i32
29 define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
30 ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
31 ; VLX: # %bb.0: # %entry
32 ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
33 ; VLX-NEXT: kmovd %k0, %eax
36 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
37 ; NoVLX: # %bb.0: # %entry
38 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
39 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
40 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
41 ; NoVLX-NEXT: kmovw %k0, %eax
42 ; NoVLX-NEXT: vzeroupper
45 %0 = bitcast <2 x i64> %__a to <16 x i8>
46 %load = load <2 x i64>, <2 x i64>* %__b
47 %1 = bitcast <2 x i64> %load to <16 x i8>
48 %2 = icmp eq <16 x i8> %0, %1
49 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
50 %4 = bitcast <32 x i1> %3 to i32
54 define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
55 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
56 ; VLX: # %bb.0: # %entry
57 ; VLX-NEXT: kmovd %edi, %k1
58 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
59 ; VLX-NEXT: kmovd %k0, %eax
62 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
63 ; NoVLX: # %bb.0: # %entry
64 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
65 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
66 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
67 ; NoVLX-NEXT: kmovw %k0, %eax
68 ; NoVLX-NEXT: andl %edi, %eax
69 ; NoVLX-NEXT: vzeroupper
72 %0 = bitcast <2 x i64> %__a to <16 x i8>
73 %1 = bitcast <2 x i64> %__b to <16 x i8>
74 %2 = icmp eq <16 x i8> %0, %1
75 %3 = bitcast i16 %__u to <16 x i1>
76 %4 = and <16 x i1> %2, %3
77 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
78 %6 = bitcast <32 x i1> %5 to i32
82 define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
83 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
84 ; VLX: # %bb.0: # %entry
85 ; VLX-NEXT: kmovd %edi, %k1
86 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
87 ; VLX-NEXT: kmovd %k0, %eax
90 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
91 ; NoVLX: # %bb.0: # %entry
92 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
93 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
94 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
95 ; NoVLX-NEXT: kmovw %k0, %eax
96 ; NoVLX-NEXT: andl %edi, %eax
97 ; NoVLX-NEXT: vzeroupper
100 %0 = bitcast <2 x i64> %__a to <16 x i8>
101 %load = load <2 x i64>, <2 x i64>* %__b
102 %1 = bitcast <2 x i64> %load to <16 x i8>
103 %2 = icmp eq <16 x i8> %0, %1
104 %3 = bitcast i16 %__u to <16 x i1>
105 %4 = and <16 x i1> %2, %3
106 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
107 %6 = bitcast <32 x i1> %5 to i32
112 define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
113 ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
114 ; VLX: # %bb.0: # %entry
115 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
116 ; VLX-NEXT: kmovq %k0, %rax
119 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
120 ; NoVLX: # %bb.0: # %entry
121 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
122 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
123 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
124 ; NoVLX-NEXT: kmovw %k0, %eax
125 ; NoVLX-NEXT: movzwl %ax, %eax
126 ; NoVLX-NEXT: vzeroupper
129 %0 = bitcast <2 x i64> %__a to <16 x i8>
130 %1 = bitcast <2 x i64> %__b to <16 x i8>
131 %2 = icmp eq <16 x i8> %0, %1
132 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
133 %4 = bitcast <64 x i1> %3 to i64
137 define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
138 ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
139 ; VLX: # %bb.0: # %entry
140 ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
141 ; VLX-NEXT: kmovq %k0, %rax
144 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
145 ; NoVLX: # %bb.0: # %entry
146 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
147 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
148 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
149 ; NoVLX-NEXT: kmovw %k0, %eax
150 ; NoVLX-NEXT: movzwl %ax, %eax
151 ; NoVLX-NEXT: vzeroupper
154 %0 = bitcast <2 x i64> %__a to <16 x i8>
155 %load = load <2 x i64>, <2 x i64>* %__b
156 %1 = bitcast <2 x i64> %load to <16 x i8>
157 %2 = icmp eq <16 x i8> %0, %1
158 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
159 %4 = bitcast <64 x i1> %3 to i64
163 define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
164 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
165 ; VLX: # %bb.0: # %entry
166 ; VLX-NEXT: kmovd %edi, %k1
167 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
168 ; VLX-NEXT: kmovq %k0, %rax
171 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
172 ; NoVLX: # %bb.0: # %entry
173 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
174 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
175 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
176 ; NoVLX-NEXT: kmovw %k0, %eax
177 ; NoVLX-NEXT: andl %edi, %eax
178 ; NoVLX-NEXT: vzeroupper
181 %0 = bitcast <2 x i64> %__a to <16 x i8>
182 %1 = bitcast <2 x i64> %__b to <16 x i8>
183 %2 = icmp eq <16 x i8> %0, %1
184 %3 = bitcast i16 %__u to <16 x i1>
185 %4 = and <16 x i1> %2, %3
186 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
187 %6 = bitcast <64 x i1> %5 to i64
191 define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
192 ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
193 ; VLX: # %bb.0: # %entry
194 ; VLX-NEXT: kmovd %edi, %k1
195 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
196 ; VLX-NEXT: kmovq %k0, %rax
199 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
200 ; NoVLX: # %bb.0: # %entry
201 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
202 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
203 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
204 ; NoVLX-NEXT: kmovw %k0, %eax
205 ; NoVLX-NEXT: andl %edi, %eax
206 ; NoVLX-NEXT: vzeroupper
209 %0 = bitcast <2 x i64> %__a to <16 x i8>
210 %load = load <2 x i64>, <2 x i64>* %__b
211 %1 = bitcast <2 x i64> %load to <16 x i8>
212 %2 = icmp eq <16 x i8> %0, %1
213 %3 = bitcast i16 %__u to <16 x i1>
214 %4 = and <16 x i1> %2, %3
215 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
216 %6 = bitcast <64 x i1> %5 to i64
221 define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
222 ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
223 ; VLX: # %bb.0: # %entry
224 ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
225 ; VLX-NEXT: kmovq %k0, %rax
226 ; VLX-NEXT: vzeroupper
229 ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
230 ; NoVLX: # %bb.0: # %entry
231 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
232 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
233 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
234 ; NoVLX-NEXT: kmovw %k0, %ecx
235 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
236 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
237 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
238 ; NoVLX-NEXT: kmovw %k0, %eax
239 ; NoVLX-NEXT: shll $16, %eax
240 ; NoVLX-NEXT: orl %ecx, %eax
241 ; NoVLX-NEXT: vzeroupper
244 %0 = bitcast <4 x i64> %__a to <32 x i8>
245 %1 = bitcast <4 x i64> %__b to <32 x i8>
246 %2 = icmp eq <32 x i8> %0, %1
247 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
248 %4 = bitcast <64 x i1> %3 to i64
252 define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
253 ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
254 ; VLX: # %bb.0: # %entry
255 ; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
256 ; VLX-NEXT: kmovq %k0, %rax
257 ; VLX-NEXT: vzeroupper
260 ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
261 ; NoVLX: # %bb.0: # %entry
262 ; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0
263 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
264 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
265 ; NoVLX-NEXT: kmovw %k0, %ecx
266 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
267 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
268 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
269 ; NoVLX-NEXT: kmovw %k0, %eax
270 ; NoVLX-NEXT: shll $16, %eax
271 ; NoVLX-NEXT: orl %ecx, %eax
272 ; NoVLX-NEXT: vzeroupper
275 %0 = bitcast <4 x i64> %__a to <32 x i8>
276 %load = load <4 x i64>, <4 x i64>* %__b
277 %1 = bitcast <4 x i64> %load to <32 x i8>
278 %2 = icmp eq <32 x i8> %0, %1
279 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
280 %4 = bitcast <64 x i1> %3 to i64
284 define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
285 ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
286 ; VLX: # %bb.0: # %entry
287 ; VLX-NEXT: kmovd %edi, %k1
288 ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
289 ; VLX-NEXT: kmovq %k0, %rax
290 ; VLX-NEXT: vzeroupper
293 ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
294 ; NoVLX: # %bb.0: # %entry
295 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
296 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
297 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
298 ; NoVLX-NEXT: kmovw %k0, %eax
299 ; NoVLX-NEXT: andl %edi, %eax
300 ; NoVLX-NEXT: shrl $16, %edi
301 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
302 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
303 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
304 ; NoVLX-NEXT: kmovw %k0, %ecx
305 ; NoVLX-NEXT: andl %edi, %ecx
306 ; NoVLX-NEXT: shll $16, %ecx
307 ; NoVLX-NEXT: movzwl %ax, %eax
308 ; NoVLX-NEXT: orl %ecx, %eax
309 ; NoVLX-NEXT: vzeroupper
312 %0 = bitcast <4 x i64> %__a to <32 x i8>
313 %1 = bitcast <4 x i64> %__b to <32 x i8>
314 %2 = icmp eq <32 x i8> %0, %1
315 %3 = bitcast i32 %__u to <32 x i1>
316 %4 = and <32 x i1> %2, %3
317 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
318 %6 = bitcast <64 x i1> %5 to i64
322 define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
323 ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
324 ; VLX: # %bb.0: # %entry
325 ; VLX-NEXT: kmovd %edi, %k1
326 ; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
327 ; VLX-NEXT: kmovq %k0, %rax
328 ; VLX-NEXT: vzeroupper
331 ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
332 ; NoVLX: # %bb.0: # %entry
333 ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
334 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
335 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
336 ; NoVLX-NEXT: kmovw %k0, %eax
337 ; NoVLX-NEXT: andl %edi, %eax
338 ; NoVLX-NEXT: shrl $16, %edi
339 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
340 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
341 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
342 ; NoVLX-NEXT: kmovw %k0, %ecx
343 ; NoVLX-NEXT: andl %edi, %ecx
344 ; NoVLX-NEXT: shll $16, %ecx
345 ; NoVLX-NEXT: movzwl %ax, %eax
346 ; NoVLX-NEXT: orl %ecx, %eax
347 ; NoVLX-NEXT: vzeroupper
350 %0 = bitcast <4 x i64> %__a to <32 x i8>
351 %load = load <4 x i64>, <4 x i64>* %__b
352 %1 = bitcast <4 x i64> %load to <32 x i8>
353 %2 = icmp eq <32 x i8> %0, %1
354 %3 = bitcast i32 %__u to <32 x i1>
355 %4 = and <32 x i1> %2, %3
356 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
357 %6 = bitcast <64 x i1> %5 to i64
362 define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
363 ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
364 ; VLX: # %bb.0: # %entry
365 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
366 ; VLX-NEXT: kmovd %k0, %eax
367 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
370 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
371 ; NoVLX: # %bb.0: # %entry
372 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
373 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
374 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
375 ; NoVLX-NEXT: kmovw %k0, %eax
376 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
377 ; NoVLX-NEXT: vzeroupper
380 %0 = bitcast <2 x i64> %__a to <8 x i16>
381 %1 = bitcast <2 x i64> %__b to <8 x i16>
382 %2 = icmp eq <8 x i16> %0, %1
383 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
384 %4 = bitcast <16 x i1> %3 to i16
388 define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
389 ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
390 ; VLX: # %bb.0: # %entry
391 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
392 ; VLX-NEXT: kmovd %k0, %eax
393 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
396 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
397 ; NoVLX: # %bb.0: # %entry
398 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
399 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
400 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
401 ; NoVLX-NEXT: kmovw %k0, %eax
402 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
403 ; NoVLX-NEXT: vzeroupper
406 %0 = bitcast <2 x i64> %__a to <8 x i16>
407 %load = load <2 x i64>, <2 x i64>* %__b
408 %1 = bitcast <2 x i64> %load to <8 x i16>
409 %2 = icmp eq <8 x i16> %0, %1
410 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
411 %4 = bitcast <16 x i1> %3 to i16
415 define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
416 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
417 ; VLX: # %bb.0: # %entry
418 ; VLX-NEXT: kmovd %edi, %k1
419 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
420 ; VLX-NEXT: kmovd %k0, %eax
421 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
424 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
425 ; NoVLX: # %bb.0: # %entry
426 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
427 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
428 ; NoVLX-NEXT: kmovw %edi, %k1
429 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
430 ; NoVLX-NEXT: kmovw %k0, %eax
431 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
432 ; NoVLX-NEXT: vzeroupper
435 %0 = bitcast <2 x i64> %__a to <8 x i16>
436 %1 = bitcast <2 x i64> %__b to <8 x i16>
437 %2 = icmp eq <8 x i16> %0, %1
438 %3 = bitcast i8 %__u to <8 x i1>
439 %4 = and <8 x i1> %2, %3
440 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
441 %6 = bitcast <16 x i1> %5 to i16
445 define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
446 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
447 ; VLX: # %bb.0: # %entry
448 ; VLX-NEXT: kmovd %edi, %k1
449 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
450 ; VLX-NEXT: kmovd %k0, %eax
451 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
454 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
455 ; NoVLX: # %bb.0: # %entry
456 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
457 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
458 ; NoVLX-NEXT: kmovw %edi, %k1
459 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
460 ; NoVLX-NEXT: kmovw %k0, %eax
461 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
462 ; NoVLX-NEXT: vzeroupper
465 %0 = bitcast <2 x i64> %__a to <8 x i16>
466 %load = load <2 x i64>, <2 x i64>* %__b
467 %1 = bitcast <2 x i64> %load to <8 x i16>
468 %2 = icmp eq <8 x i16> %0, %1
469 %3 = bitcast i8 %__u to <8 x i1>
470 %4 = and <8 x i1> %2, %3
471 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
472 %6 = bitcast <16 x i1> %5 to i16
477 define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
478 ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
479 ; VLX: # %bb.0: # %entry
480 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
481 ; VLX-NEXT: kmovd %k0, %eax
484 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
485 ; NoVLX: # %bb.0: # %entry
486 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
487 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
488 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
489 ; NoVLX-NEXT: kmovw %k0, %eax
490 ; NoVLX-NEXT: vzeroupper
493 %0 = bitcast <2 x i64> %__a to <8 x i16>
494 %1 = bitcast <2 x i64> %__b to <8 x i16>
495 %2 = icmp eq <8 x i16> %0, %1
496 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
497 %4 = bitcast <32 x i1> %3 to i32
501 define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
502 ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
503 ; VLX: # %bb.0: # %entry
504 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
505 ; VLX-NEXT: kmovd %k0, %eax
508 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
509 ; NoVLX: # %bb.0: # %entry
510 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
511 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
512 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
513 ; NoVLX-NEXT: kmovw %k0, %eax
514 ; NoVLX-NEXT: vzeroupper
517 %0 = bitcast <2 x i64> %__a to <8 x i16>
518 %load = load <2 x i64>, <2 x i64>* %__b
519 %1 = bitcast <2 x i64> %load to <8 x i16>
520 %2 = icmp eq <8 x i16> %0, %1
521 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
522 %4 = bitcast <32 x i1> %3 to i32
526 define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
527 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
528 ; VLX: # %bb.0: # %entry
529 ; VLX-NEXT: kmovd %edi, %k1
530 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
531 ; VLX-NEXT: kmovd %k0, %eax
534 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
535 ; NoVLX: # %bb.0: # %entry
536 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
537 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
538 ; NoVLX-NEXT: kmovw %edi, %k1
539 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
540 ; NoVLX-NEXT: kmovw %k0, %eax
541 ; NoVLX-NEXT: vzeroupper
544 %0 = bitcast <2 x i64> %__a to <8 x i16>
545 %1 = bitcast <2 x i64> %__b to <8 x i16>
546 %2 = icmp eq <8 x i16> %0, %1
547 %3 = bitcast i8 %__u to <8 x i1>
548 %4 = and <8 x i1> %2, %3
549 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
550 %6 = bitcast <32 x i1> %5 to i32
554 define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
555 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
556 ; VLX: # %bb.0: # %entry
557 ; VLX-NEXT: kmovd %edi, %k1
558 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
559 ; VLX-NEXT: kmovd %k0, %eax
562 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
563 ; NoVLX: # %bb.0: # %entry
564 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
565 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
566 ; NoVLX-NEXT: kmovw %edi, %k1
567 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
568 ; NoVLX-NEXT: kmovw %k0, %eax
569 ; NoVLX-NEXT: vzeroupper
572 %0 = bitcast <2 x i64> %__a to <8 x i16>
573 %load = load <2 x i64>, <2 x i64>* %__b
574 %1 = bitcast <2 x i64> %load to <8 x i16>
575 %2 = icmp eq <8 x i16> %0, %1
576 %3 = bitcast i8 %__u to <8 x i1>
577 %4 = and <8 x i1> %2, %3
578 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
579 %6 = bitcast <32 x i1> %5 to i32
584 define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
585 ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
586 ; VLX: # %bb.0: # %entry
587 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
588 ; VLX-NEXT: kmovq %k0, %rax
591 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
592 ; NoVLX: # %bb.0: # %entry
593 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
594 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
595 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
596 ; NoVLX-NEXT: kmovw %k0, %eax
597 ; NoVLX-NEXT: movzwl %ax, %eax
598 ; NoVLX-NEXT: vzeroupper
601 %0 = bitcast <2 x i64> %__a to <8 x i16>
602 %1 = bitcast <2 x i64> %__b to <8 x i16>
603 %2 = icmp eq <8 x i16> %0, %1
604 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
605 %4 = bitcast <64 x i1> %3 to i64
609 define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
610 ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
611 ; VLX: # %bb.0: # %entry
612 ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
613 ; VLX-NEXT: kmovq %k0, %rax
616 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
617 ; NoVLX: # %bb.0: # %entry
618 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
619 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
620 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
621 ; NoVLX-NEXT: kmovw %k0, %eax
622 ; NoVLX-NEXT: movzwl %ax, %eax
623 ; NoVLX-NEXT: vzeroupper
626 %0 = bitcast <2 x i64> %__a to <8 x i16>
627 %load = load <2 x i64>, <2 x i64>* %__b
628 %1 = bitcast <2 x i64> %load to <8 x i16>
629 %2 = icmp eq <8 x i16> %0, %1
630 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
631 %4 = bitcast <64 x i1> %3 to i64
635 define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
636 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
637 ; VLX: # %bb.0: # %entry
638 ; VLX-NEXT: kmovd %edi, %k1
639 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
640 ; VLX-NEXT: kmovq %k0, %rax
643 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
644 ; NoVLX: # %bb.0: # %entry
645 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
646 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
647 ; NoVLX-NEXT: kmovw %edi, %k1
648 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
649 ; NoVLX-NEXT: kmovw %k0, %eax
650 ; NoVLX-NEXT: movzwl %ax, %eax
651 ; NoVLX-NEXT: vzeroupper
654 %0 = bitcast <2 x i64> %__a to <8 x i16>
655 %1 = bitcast <2 x i64> %__b to <8 x i16>
656 %2 = icmp eq <8 x i16> %0, %1
657 %3 = bitcast i8 %__u to <8 x i1>
658 %4 = and <8 x i1> %2, %3
659 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
660 %6 = bitcast <64 x i1> %5 to i64
664 define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
665 ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
666 ; VLX: # %bb.0: # %entry
667 ; VLX-NEXT: kmovd %edi, %k1
668 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
669 ; VLX-NEXT: kmovq %k0, %rax
672 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
673 ; NoVLX: # %bb.0: # %entry
674 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
675 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
676 ; NoVLX-NEXT: kmovw %edi, %k1
677 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
678 ; NoVLX-NEXT: kmovw %k0, %eax
679 ; NoVLX-NEXT: movzwl %ax, %eax
680 ; NoVLX-NEXT: vzeroupper
683 %0 = bitcast <2 x i64> %__a to <8 x i16>
684 %load = load <2 x i64>, <2 x i64>* %__b
685 %1 = bitcast <2 x i64> %load to <8 x i16>
686 %2 = icmp eq <8 x i16> %0, %1
687 %3 = bitcast i8 %__u to <8 x i1>
688 %4 = and <8 x i1> %2, %3
689 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
690 %6 = bitcast <64 x i1> %5 to i64
695 define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
696 ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
697 ; VLX: # %bb.0: # %entry
698 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
699 ; VLX-NEXT: kmovd %k0, %eax
700 ; VLX-NEXT: vzeroupper
703 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
704 ; NoVLX: # %bb.0: # %entry
705 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
706 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
707 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
708 ; NoVLX-NEXT: kmovw %k0, %eax
709 ; NoVLX-NEXT: vzeroupper
712 %0 = bitcast <4 x i64> %__a to <16 x i16>
713 %1 = bitcast <4 x i64> %__b to <16 x i16>
714 %2 = icmp eq <16 x i16> %0, %1
715 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
716 %4 = bitcast <32 x i1> %3 to i32
720 define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
721 ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
722 ; VLX: # %bb.0: # %entry
723 ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
724 ; VLX-NEXT: kmovd %k0, %eax
725 ; VLX-NEXT: vzeroupper
728 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
729 ; NoVLX: # %bb.0: # %entry
730 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
731 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
732 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
733 ; NoVLX-NEXT: kmovw %k0, %eax
734 ; NoVLX-NEXT: vzeroupper
737 %0 = bitcast <4 x i64> %__a to <16 x i16>
738 %load = load <4 x i64>, <4 x i64>* %__b
739 %1 = bitcast <4 x i64> %load to <16 x i16>
740 %2 = icmp eq <16 x i16> %0, %1
741 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
742 %4 = bitcast <32 x i1> %3 to i32
746 define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
747 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
748 ; VLX: # %bb.0: # %entry
749 ; VLX-NEXT: kmovd %edi, %k1
750 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
751 ; VLX-NEXT: kmovd %k0, %eax
752 ; VLX-NEXT: vzeroupper
755 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
756 ; NoVLX: # %bb.0: # %entry
757 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
758 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
759 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
760 ; NoVLX-NEXT: kmovw %k0, %eax
761 ; NoVLX-NEXT: andl %edi, %eax
762 ; NoVLX-NEXT: vzeroupper
765 %0 = bitcast <4 x i64> %__a to <16 x i16>
766 %1 = bitcast <4 x i64> %__b to <16 x i16>
767 %2 = icmp eq <16 x i16> %0, %1
768 %3 = bitcast i16 %__u to <16 x i1>
769 %4 = and <16 x i1> %2, %3
770 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
771 %6 = bitcast <32 x i1> %5 to i32
775 define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
776 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
777 ; VLX: # %bb.0: # %entry
778 ; VLX-NEXT: kmovd %edi, %k1
779 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
780 ; VLX-NEXT: kmovd %k0, %eax
781 ; VLX-NEXT: vzeroupper
784 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
785 ; NoVLX: # %bb.0: # %entry
786 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
787 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
788 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
789 ; NoVLX-NEXT: kmovw %k0, %eax
790 ; NoVLX-NEXT: andl %edi, %eax
791 ; NoVLX-NEXT: vzeroupper
794 %0 = bitcast <4 x i64> %__a to <16 x i16>
795 %load = load <4 x i64>, <4 x i64>* %__b
796 %1 = bitcast <4 x i64> %load to <16 x i16>
797 %2 = icmp eq <16 x i16> %0, %1
798 %3 = bitcast i16 %__u to <16 x i1>
799 %4 = and <16 x i1> %2, %3
800 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
801 %6 = bitcast <32 x i1> %5 to i32
806 define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
807 ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
808 ; VLX: # %bb.0: # %entry
809 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
810 ; VLX-NEXT: kmovq %k0, %rax
811 ; VLX-NEXT: vzeroupper
814 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
815 ; NoVLX: # %bb.0: # %entry
816 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
817 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
818 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
819 ; NoVLX-NEXT: kmovw %k0, %eax
820 ; NoVLX-NEXT: movzwl %ax, %eax
821 ; NoVLX-NEXT: vzeroupper
824 %0 = bitcast <4 x i64> %__a to <16 x i16>
825 %1 = bitcast <4 x i64> %__b to <16 x i16>
826 %2 = icmp eq <16 x i16> %0, %1
827 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
828 %4 = bitcast <64 x i1> %3 to i64
832 define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
833 ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
834 ; VLX: # %bb.0: # %entry
835 ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
836 ; VLX-NEXT: kmovq %k0, %rax
837 ; VLX-NEXT: vzeroupper
840 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
841 ; NoVLX: # %bb.0: # %entry
842 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
843 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
844 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
845 ; NoVLX-NEXT: kmovw %k0, %eax
846 ; NoVLX-NEXT: movzwl %ax, %eax
847 ; NoVLX-NEXT: vzeroupper
850 %0 = bitcast <4 x i64> %__a to <16 x i16>
851 %load = load <4 x i64>, <4 x i64>* %__b
852 %1 = bitcast <4 x i64> %load to <16 x i16>
853 %2 = icmp eq <16 x i16> %0, %1
854 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
855 %4 = bitcast <64 x i1> %3 to i64
859 define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
860 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
861 ; VLX: # %bb.0: # %entry
862 ; VLX-NEXT: kmovd %edi, %k1
863 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
864 ; VLX-NEXT: kmovq %k0, %rax
865 ; VLX-NEXT: vzeroupper
868 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
869 ; NoVLX: # %bb.0: # %entry
870 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
871 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
872 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
873 ; NoVLX-NEXT: kmovw %k0, %eax
874 ; NoVLX-NEXT: andl %edi, %eax
875 ; NoVLX-NEXT: vzeroupper
878 %0 = bitcast <4 x i64> %__a to <16 x i16>
879 %1 = bitcast <4 x i64> %__b to <16 x i16>
880 %2 = icmp eq <16 x i16> %0, %1
881 %3 = bitcast i16 %__u to <16 x i1>
882 %4 = and <16 x i1> %2, %3
883 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
884 %6 = bitcast <64 x i1> %5 to i64
888 define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
889 ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
890 ; VLX: # %bb.0: # %entry
891 ; VLX-NEXT: kmovd %edi, %k1
892 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
893 ; VLX-NEXT: kmovq %k0, %rax
894 ; VLX-NEXT: vzeroupper
897 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
898 ; NoVLX: # %bb.0: # %entry
899 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
900 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
901 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
902 ; NoVLX-NEXT: kmovw %k0, %eax
903 ; NoVLX-NEXT: andl %edi, %eax
904 ; NoVLX-NEXT: vzeroupper
907 %0 = bitcast <4 x i64> %__a to <16 x i16>
908 %load = load <4 x i64>, <4 x i64>* %__b
909 %1 = bitcast <4 x i64> %load to <16 x i16>
910 %2 = icmp eq <16 x i16> %0, %1
911 %3 = bitcast i16 %__u to <16 x i1>
912 %4 = and <16 x i1> %2, %3
913 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
914 %6 = bitcast <64 x i1> %5 to i64
919 define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
920 ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
921 ; VLX: # %bb.0: # %entry
922 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
923 ; VLX-NEXT: kmovq %k0, %rax
924 ; VLX-NEXT: vzeroupper
927 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
928 ; NoVLX: # %bb.0: # %entry
929 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
930 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
931 ; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
932 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
933 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
934 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
935 ; NoVLX-NEXT: kmovw %k0, %ecx
936 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
937 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
938 ; NoVLX-NEXT: kmovw %k0, %eax
939 ; NoVLX-NEXT: shll $16, %eax
940 ; NoVLX-NEXT: orl %ecx, %eax
941 ; NoVLX-NEXT: vzeroupper
944 %0 = bitcast <8 x i64> %__a to <32 x i16>
945 %1 = bitcast <8 x i64> %__b to <32 x i16>
946 %2 = icmp eq <32 x i16> %0, %1
947 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
948 %4 = bitcast <64 x i1> %3 to i64
952 define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
953 ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
954 ; VLX: # %bb.0: # %entry
955 ; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
956 ; VLX-NEXT: kmovq %k0, %rax
957 ; VLX-NEXT: vzeroupper
960 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
961 ; NoVLX: # %bb.0: # %entry
962 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
963 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
964 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
965 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
966 ; NoVLX-NEXT: kmovw %k0, %ecx
967 ; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm0
968 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
969 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
970 ; NoVLX-NEXT: kmovw %k0, %eax
971 ; NoVLX-NEXT: shll $16, %eax
972 ; NoVLX-NEXT: orl %ecx, %eax
973 ; NoVLX-NEXT: vzeroupper
976 %0 = bitcast <8 x i64> %__a to <32 x i16>
977 %load = load <8 x i64>, <8 x i64>* %__b
978 %1 = bitcast <8 x i64> %load to <32 x i16>
979 %2 = icmp eq <32 x i16> %0, %1
980 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
981 %4 = bitcast <64 x i1> %3 to i64
985 define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
986 ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
987 ; VLX: # %bb.0: # %entry
988 ; VLX-NEXT: kmovd %edi, %k1
989 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
990 ; VLX-NEXT: kmovq %k0, %rax
991 ; VLX-NEXT: vzeroupper
994 ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
995 ; NoVLX: # %bb.0: # %entry
996 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2
997 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
998 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
999 ; NoVLX-NEXT: kmovw %k0, %eax
1000 ; NoVLX-NEXT: andl %edi, %eax
1001 ; NoVLX-NEXT: shrl $16, %edi
1002 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1003 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1004 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1005 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1006 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1007 ; NoVLX-NEXT: kmovw %k0, %ecx
1008 ; NoVLX-NEXT: andl %edi, %ecx
1009 ; NoVLX-NEXT: shll $16, %ecx
1010 ; NoVLX-NEXT: movzwl %ax, %eax
1011 ; NoVLX-NEXT: orl %ecx, %eax
1012 ; NoVLX-NEXT: vzeroupper
1015 %0 = bitcast <8 x i64> %__a to <32 x i16>
1016 %1 = bitcast <8 x i64> %__b to <32 x i16>
1017 %2 = icmp eq <32 x i16> %0, %1
1018 %3 = bitcast i32 %__u to <32 x i1>
1019 %4 = and <32 x i1> %2, %3
1020 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1021 %6 = bitcast <64 x i1> %5 to i64
1025 define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
1026 ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1027 ; VLX: # %bb.0: # %entry
1028 ; VLX-NEXT: kmovd %edi, %k1
1029 ; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
1030 ; VLX-NEXT: kmovq %k0, %rax
1031 ; VLX-NEXT: vzeroupper
1034 ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
1035 ; NoVLX: # %bb.0: # %entry
1036 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm1
1037 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
1038 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1039 ; NoVLX-NEXT: kmovw %k0, %eax
1040 ; NoVLX-NEXT: andl %edi, %eax
1041 ; NoVLX-NEXT: shrl $16, %edi
1042 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1043 ; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0
1044 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1045 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1046 ; NoVLX-NEXT: kmovw %k0, %ecx
1047 ; NoVLX-NEXT: andl %edi, %ecx
1048 ; NoVLX-NEXT: shll $16, %ecx
1049 ; NoVLX-NEXT: movzwl %ax, %eax
1050 ; NoVLX-NEXT: orl %ecx, %eax
1051 ; NoVLX-NEXT: vzeroupper
1054 %0 = bitcast <8 x i64> %__a to <32 x i16>
1055 %load = load <8 x i64>, <8 x i64>* %__b
1056 %1 = bitcast <8 x i64> %load to <32 x i16>
1057 %2 = icmp eq <32 x i16> %0, %1
1058 %3 = bitcast i32 %__u to <32 x i1>
1059 %4 = and <32 x i1> %2, %3
1060 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
1061 %6 = bitcast <64 x i1> %5 to i64
1066 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1067 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1068 ; VLX: # %bb.0: # %entry
1069 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1070 ; VLX-NEXT: kmovd %k0, %eax
1071 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1074 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
1075 ; NoVLX: # %bb.0: # %entry
1076 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1077 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1078 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1079 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1080 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1081 ; NoVLX-NEXT: kmovw %k0, %eax
1082 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1083 ; NoVLX-NEXT: vzeroupper
1086 %0 = bitcast <2 x i64> %__a to <4 x i32>
1087 %1 = bitcast <2 x i64> %__b to <4 x i32>
1088 %2 = icmp eq <4 x i32> %0, %1
1089 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1090 %4 = bitcast <8 x i1> %3 to i8
1094 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1095 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1096 ; VLX: # %bb.0: # %entry
1097 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1098 ; VLX-NEXT: kmovd %k0, %eax
1099 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1102 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
1103 ; NoVLX: # %bb.0: # %entry
1104 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1105 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1106 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1107 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1108 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1109 ; NoVLX-NEXT: kmovw %k0, %eax
1110 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1111 ; NoVLX-NEXT: vzeroupper
1114 %0 = bitcast <2 x i64> %__a to <4 x i32>
1115 %load = load <2 x i64>, <2 x i64>* %__b
1116 %1 = bitcast <2 x i64> %load to <4 x i32>
1117 %2 = icmp eq <4 x i32> %0, %1
1118 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1119 %4 = bitcast <8 x i1> %3 to i8
1123 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1124 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1125 ; VLX: # %bb.0: # %entry
1126 ; VLX-NEXT: kmovd %edi, %k1
1127 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1128 ; VLX-NEXT: kmovd %k0, %eax
1129 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1132 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
1133 ; NoVLX: # %bb.0: # %entry
1134 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1135 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1136 ; NoVLX-NEXT: kmovw %edi, %k1
1137 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1138 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1139 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1140 ; NoVLX-NEXT: kmovw %k0, %eax
1141 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1142 ; NoVLX-NEXT: vzeroupper
1145 %0 = bitcast <2 x i64> %__a to <4 x i32>
1146 %1 = bitcast <2 x i64> %__b to <4 x i32>
1147 %2 = icmp eq <4 x i32> %0, %1
1148 %3 = bitcast i8 %__u to <8 x i1>
1149 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1150 %4 = and <4 x i1> %2, %extract.i
1151 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1152 %6 = bitcast <8 x i1> %5 to i8
1156 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1157 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1158 ; VLX: # %bb.0: # %entry
1159 ; VLX-NEXT: kmovd %edi, %k1
1160 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1161 ; VLX-NEXT: kmovd %k0, %eax
1162 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1165 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
1166 ; NoVLX: # %bb.0: # %entry
1167 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1168 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1169 ; NoVLX-NEXT: kmovw %edi, %k1
1170 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1171 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1172 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1173 ; NoVLX-NEXT: kmovw %k0, %eax
1174 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1175 ; NoVLX-NEXT: vzeroupper
1178 %0 = bitcast <2 x i64> %__a to <4 x i32>
1179 %load = load <2 x i64>, <2 x i64>* %__b
1180 %1 = bitcast <2 x i64> %load to <4 x i32>
1181 %2 = icmp eq <4 x i32> %0, %1
1182 %3 = bitcast i8 %__u to <8 x i1>
1183 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1184 %4 = and <4 x i1> %2, %extract.i
1185 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1186 %6 = bitcast <8 x i1> %5 to i8
1191 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1192 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1193 ; VLX: # %bb.0: # %entry
1194 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1195 ; VLX-NEXT: kmovd %k0, %eax
1196 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1199 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1200 ; NoVLX: # %bb.0: # %entry
1201 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1202 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
1203 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1204 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1205 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1206 ; NoVLX-NEXT: kmovw %k0, %eax
1207 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1208 ; NoVLX-NEXT: vzeroupper
1211 %0 = bitcast <2 x i64> %__a to <4 x i32>
1212 %load = load i32, i32* %__b
1213 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1214 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1215 %2 = icmp eq <4 x i32> %0, %1
1216 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1217 %4 = bitcast <8 x i1> %3 to i8
1221 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1222 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1223 ; VLX: # %bb.0: # %entry
1224 ; VLX-NEXT: kmovd %edi, %k1
1225 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1226 ; VLX-NEXT: kmovd %k0, %eax
1227 ; VLX-NEXT: # kill: def $al killed $al killed $eax
1230 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
1231 ; NoVLX: # %bb.0: # %entry
1232 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1233 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
1234 ; NoVLX-NEXT: kmovw %edi, %k1
1235 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1236 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1237 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1238 ; NoVLX-NEXT: kmovw %k0, %eax
1239 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
1240 ; NoVLX-NEXT: vzeroupper
1243 %0 = bitcast <2 x i64> %__a to <4 x i32>
1244 %load = load i32, i32* %__b
1245 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1246 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1247 %2 = icmp eq <4 x i32> %0, %1
1248 %3 = bitcast i8 %__u to <8 x i1>
1249 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1250 %4 = and <4 x i1> %extract.i, %2
1251 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1252 %6 = bitcast <8 x i1> %5 to i8
1257 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1258 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1259 ; VLX: # %bb.0: # %entry
1260 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1261 ; VLX-NEXT: kmovd %k0, %eax
1262 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1265 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
1266 ; NoVLX: # %bb.0: # %entry
1267 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1268 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1269 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1270 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1271 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1272 ; NoVLX-NEXT: kmovw %k0, %eax
1273 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1274 ; NoVLX-NEXT: vzeroupper
1277 %0 = bitcast <2 x i64> %__a to <4 x i32>
1278 %1 = bitcast <2 x i64> %__b to <4 x i32>
1279 %2 = icmp eq <4 x i32> %0, %1
1280 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1281 %4 = bitcast <16 x i1> %3 to i16
1285 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1286 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1287 ; VLX: # %bb.0: # %entry
1288 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1289 ; VLX-NEXT: kmovd %k0, %eax
1290 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1293 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
1294 ; NoVLX: # %bb.0: # %entry
1295 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1296 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1297 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1298 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1299 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1300 ; NoVLX-NEXT: kmovw %k0, %eax
1301 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1302 ; NoVLX-NEXT: vzeroupper
1305 %0 = bitcast <2 x i64> %__a to <4 x i32>
1306 %load = load <2 x i64>, <2 x i64>* %__b
1307 %1 = bitcast <2 x i64> %load to <4 x i32>
1308 %2 = icmp eq <4 x i32> %0, %1
1309 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1310 %4 = bitcast <16 x i1> %3 to i16
1314 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1315 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1316 ; VLX: # %bb.0: # %entry
1317 ; VLX-NEXT: kmovd %edi, %k1
1318 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1319 ; VLX-NEXT: kmovd %k0, %eax
1320 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1323 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
1324 ; NoVLX: # %bb.0: # %entry
1325 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1326 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1327 ; NoVLX-NEXT: kmovw %edi, %k1
1328 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1329 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1330 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1331 ; NoVLX-NEXT: kmovw %k0, %eax
1332 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1333 ; NoVLX-NEXT: vzeroupper
1336 %0 = bitcast <2 x i64> %__a to <4 x i32>
1337 %1 = bitcast <2 x i64> %__b to <4 x i32>
1338 %2 = icmp eq <4 x i32> %0, %1
1339 %3 = bitcast i8 %__u to <8 x i1>
1340 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1341 %4 = and <4 x i1> %2, %extract.i
1342 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1343 %6 = bitcast <16 x i1> %5 to i16
1347 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1348 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1349 ; VLX: # %bb.0: # %entry
1350 ; VLX-NEXT: kmovd %edi, %k1
1351 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1352 ; VLX-NEXT: kmovd %k0, %eax
1353 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1356 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
1357 ; NoVLX: # %bb.0: # %entry
1358 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1359 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1360 ; NoVLX-NEXT: kmovw %edi, %k1
1361 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1362 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1363 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1364 ; NoVLX-NEXT: kmovw %k0, %eax
1365 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1366 ; NoVLX-NEXT: vzeroupper
1369 %0 = bitcast <2 x i64> %__a to <4 x i32>
1370 %load = load <2 x i64>, <2 x i64>* %__b
1371 %1 = bitcast <2 x i64> %load to <4 x i32>
1372 %2 = icmp eq <4 x i32> %0, %1
1373 %3 = bitcast i8 %__u to <8 x i1>
1374 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1375 %4 = and <4 x i1> %2, %extract.i
1376 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1377 %6 = bitcast <16 x i1> %5 to i16
1382 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1383 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1384 ; VLX: # %bb.0: # %entry
1385 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1386 ; VLX-NEXT: kmovd %k0, %eax
1387 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1390 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1391 ; NoVLX: # %bb.0: # %entry
1392 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1393 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
1394 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1395 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1396 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1397 ; NoVLX-NEXT: kmovw %k0, %eax
1398 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1399 ; NoVLX-NEXT: vzeroupper
1402 %0 = bitcast <2 x i64> %__a to <4 x i32>
1403 %load = load i32, i32* %__b
1404 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1405 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1406 %2 = icmp eq <4 x i32> %0, %1
1407 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1408 %4 = bitcast <16 x i1> %3 to i16
1412 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1413 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1414 ; VLX: # %bb.0: # %entry
1415 ; VLX-NEXT: kmovd %edi, %k1
1416 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1417 ; VLX-NEXT: kmovd %k0, %eax
1418 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1421 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
1422 ; NoVLX: # %bb.0: # %entry
1423 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1424 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
1425 ; NoVLX-NEXT: kmovw %edi, %k1
1426 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1427 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1428 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1429 ; NoVLX-NEXT: kmovw %k0, %eax
1430 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1431 ; NoVLX-NEXT: vzeroupper
1434 %0 = bitcast <2 x i64> %__a to <4 x i32>
1435 %load = load i32, i32* %__b
1436 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1437 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1438 %2 = icmp eq <4 x i32> %0, %1
1439 %3 = bitcast i8 %__u to <8 x i1>
1440 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1441 %4 = and <4 x i1> %extract.i, %2
1442 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1443 %6 = bitcast <16 x i1> %5 to i16
1448 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1449 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1450 ; VLX: # %bb.0: # %entry
1451 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1452 ; VLX-NEXT: kmovd %k0, %eax
1455 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
1456 ; NoVLX: # %bb.0: # %entry
1457 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1458 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1459 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1460 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1461 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1462 ; NoVLX-NEXT: kmovw %k0, %eax
1463 ; NoVLX-NEXT: vzeroupper
1466 %0 = bitcast <2 x i64> %__a to <4 x i32>
1467 %1 = bitcast <2 x i64> %__b to <4 x i32>
1468 %2 = icmp eq <4 x i32> %0, %1
1469 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1470 %4 = bitcast <32 x i1> %3 to i32
1474 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1475 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1476 ; VLX: # %bb.0: # %entry
1477 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1478 ; VLX-NEXT: kmovd %k0, %eax
1481 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
1482 ; NoVLX: # %bb.0: # %entry
1483 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1484 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1485 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1486 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1487 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1488 ; NoVLX-NEXT: kmovw %k0, %eax
1489 ; NoVLX-NEXT: vzeroupper
1492 %0 = bitcast <2 x i64> %__a to <4 x i32>
1493 %load = load <2 x i64>, <2 x i64>* %__b
1494 %1 = bitcast <2 x i64> %load to <4 x i32>
1495 %2 = icmp eq <4 x i32> %0, %1
1496 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1497 %4 = bitcast <32 x i1> %3 to i32
1501 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1502 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1503 ; VLX: # %bb.0: # %entry
1504 ; VLX-NEXT: kmovd %edi, %k1
1505 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1506 ; VLX-NEXT: kmovd %k0, %eax
1509 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
1510 ; NoVLX: # %bb.0: # %entry
1511 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1512 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1513 ; NoVLX-NEXT: kmovw %edi, %k1
1514 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1515 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1516 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1517 ; NoVLX-NEXT: kmovw %k0, %eax
1518 ; NoVLX-NEXT: vzeroupper
1521 %0 = bitcast <2 x i64> %__a to <4 x i32>
1522 %1 = bitcast <2 x i64> %__b to <4 x i32>
1523 %2 = icmp eq <4 x i32> %0, %1
1524 %3 = bitcast i8 %__u to <8 x i1>
1525 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1526 %4 = and <4 x i1> %2, %extract.i
1527 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1528 %6 = bitcast <32 x i1> %5 to i32
1532 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1533 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1534 ; VLX: # %bb.0: # %entry
1535 ; VLX-NEXT: kmovd %edi, %k1
1536 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1537 ; VLX-NEXT: kmovd %k0, %eax
1540 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
1541 ; NoVLX: # %bb.0: # %entry
1542 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1543 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1544 ; NoVLX-NEXT: kmovw %edi, %k1
1545 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1546 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1547 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1548 ; NoVLX-NEXT: kmovw %k0, %eax
1549 ; NoVLX-NEXT: vzeroupper
1552 %0 = bitcast <2 x i64> %__a to <4 x i32>
1553 %load = load <2 x i64>, <2 x i64>* %__b
1554 %1 = bitcast <2 x i64> %load to <4 x i32>
1555 %2 = icmp eq <4 x i32> %0, %1
1556 %3 = bitcast i8 %__u to <8 x i1>
1557 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1558 %4 = and <4 x i1> %2, %extract.i
1559 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1560 %6 = bitcast <32 x i1> %5 to i32
1565 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1566 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1567 ; VLX: # %bb.0: # %entry
1568 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1569 ; VLX-NEXT: kmovd %k0, %eax
1572 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1573 ; NoVLX: # %bb.0: # %entry
1574 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1575 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
1576 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1577 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1578 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1579 ; NoVLX-NEXT: kmovw %k0, %eax
1580 ; NoVLX-NEXT: vzeroupper
1583 %0 = bitcast <2 x i64> %__a to <4 x i32>
1584 %load = load i32, i32* %__b
1585 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1586 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1587 %2 = icmp eq <4 x i32> %0, %1
1588 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1589 %4 = bitcast <32 x i1> %3 to i32
1593 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1594 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1595 ; VLX: # %bb.0: # %entry
1596 ; VLX-NEXT: kmovd %edi, %k1
1597 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1598 ; VLX-NEXT: kmovd %k0, %eax
1601 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
1602 ; NoVLX: # %bb.0: # %entry
1603 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1604 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
1605 ; NoVLX-NEXT: kmovw %edi, %k1
1606 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1607 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1608 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1609 ; NoVLX-NEXT: kmovw %k0, %eax
1610 ; NoVLX-NEXT: vzeroupper
1613 %0 = bitcast <2 x i64> %__a to <4 x i32>
1614 %load = load i32, i32* %__b
1615 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1616 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1617 %2 = icmp eq <4 x i32> %0, %1
1618 %3 = bitcast i8 %__u to <8 x i1>
1619 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1620 %4 = and <4 x i1> %extract.i, %2
1621 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1622 %6 = bitcast <32 x i1> %5 to i32
1627 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1628 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1629 ; VLX: # %bb.0: # %entry
1630 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
1631 ; VLX-NEXT: kmovq %k0, %rax
1634 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
1635 ; NoVLX: # %bb.0: # %entry
1636 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1637 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1638 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1639 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1640 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1641 ; NoVLX-NEXT: kmovw %k0, %eax
1642 ; NoVLX-NEXT: movzwl %ax, %eax
1643 ; NoVLX-NEXT: vzeroupper
1646 %0 = bitcast <2 x i64> %__a to <4 x i32>
1647 %1 = bitcast <2 x i64> %__b to <4 x i32>
1648 %2 = icmp eq <4 x i32> %0, %1
1649 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1650 %4 = bitcast <64 x i1> %3 to i64
1654 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1655 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1656 ; VLX: # %bb.0: # %entry
1657 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
1658 ; VLX-NEXT: kmovq %k0, %rax
1661 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
1662 ; NoVLX: # %bb.0: # %entry
1663 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1664 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
1665 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1666 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1667 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1668 ; NoVLX-NEXT: kmovw %k0, %eax
1669 ; NoVLX-NEXT: movzwl %ax, %eax
1670 ; NoVLX-NEXT: vzeroupper
1673 %0 = bitcast <2 x i64> %__a to <4 x i32>
1674 %load = load <2 x i64>, <2 x i64>* %__b
1675 %1 = bitcast <2 x i64> %load to <4 x i32>
1676 %2 = icmp eq <4 x i32> %0, %1
1677 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1678 %4 = bitcast <64 x i1> %3 to i64
1682 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
1683 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1684 ; VLX: # %bb.0: # %entry
1685 ; VLX-NEXT: kmovd %edi, %k1
1686 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
1687 ; VLX-NEXT: kmovq %k0, %rax
1690 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
1691 ; NoVLX: # %bb.0: # %entry
1692 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1693 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1694 ; NoVLX-NEXT: kmovw %edi, %k1
1695 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1696 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1697 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1698 ; NoVLX-NEXT: kmovw %k0, %eax
1699 ; NoVLX-NEXT: movzwl %ax, %eax
1700 ; NoVLX-NEXT: vzeroupper
1703 %0 = bitcast <2 x i64> %__a to <4 x i32>
1704 %1 = bitcast <2 x i64> %__b to <4 x i32>
1705 %2 = icmp eq <4 x i32> %0, %1
1706 %3 = bitcast i8 %__u to <8 x i1>
1707 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1708 %4 = and <4 x i1> %2, %extract.i
1709 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1710 %6 = bitcast <64 x i1> %5 to i64
1714 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
1715 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1716 ; VLX: # %bb.0: # %entry
1717 ; VLX-NEXT: kmovd %edi, %k1
1718 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
1719 ; VLX-NEXT: kmovq %k0, %rax
1722 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
1723 ; NoVLX: # %bb.0: # %entry
1724 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1725 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
1726 ; NoVLX-NEXT: kmovw %edi, %k1
1727 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1728 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1729 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1730 ; NoVLX-NEXT: kmovw %k0, %eax
1731 ; NoVLX-NEXT: movzwl %ax, %eax
1732 ; NoVLX-NEXT: vzeroupper
1735 %0 = bitcast <2 x i64> %__a to <4 x i32>
1736 %load = load <2 x i64>, <2 x i64>* %__b
1737 %1 = bitcast <2 x i64> %load to <4 x i32>
1738 %2 = icmp eq <4 x i32> %0, %1
1739 %3 = bitcast i8 %__u to <8 x i1>
1740 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1741 %4 = and <4 x i1> %2, %extract.i
1742 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1743 %6 = bitcast <64 x i1> %5 to i64
1748 define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
1749 ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1750 ; VLX: # %bb.0: # %entry
1751 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
1752 ; VLX-NEXT: kmovq %k0, %rax
1755 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1756 ; NoVLX: # %bb.0: # %entry
1757 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1758 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
1759 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1760 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1761 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1762 ; NoVLX-NEXT: kmovw %k0, %eax
1763 ; NoVLX-NEXT: movzwl %ax, %eax
1764 ; NoVLX-NEXT: vzeroupper
1767 %0 = bitcast <2 x i64> %__a to <4 x i32>
1768 %load = load i32, i32* %__b
1769 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1770 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1771 %2 = icmp eq <4 x i32> %0, %1
1772 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1773 %4 = bitcast <64 x i1> %3 to i64
1777 define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
1778 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1779 ; VLX: # %bb.0: # %entry
1780 ; VLX-NEXT: kmovd %edi, %k1
1781 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
1782 ; VLX-NEXT: kmovq %k0, %rax
1785 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
1786 ; NoVLX: # %bb.0: # %entry
1787 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1788 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
1789 ; NoVLX-NEXT: kmovw %edi, %k1
1790 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1791 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
1792 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
1793 ; NoVLX-NEXT: kmovw %k0, %eax
1794 ; NoVLX-NEXT: movzwl %ax, %eax
1795 ; NoVLX-NEXT: vzeroupper
1798 %0 = bitcast <2 x i64> %__a to <4 x i32>
1799 %load = load i32, i32* %__b
1800 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
1801 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1802 %2 = icmp eq <4 x i32> %0, %1
1803 %3 = bitcast i8 %__u to <8 x i1>
1804 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1805 %4 = and <4 x i1> %extract.i, %2
1806 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1807 %6 = bitcast <64 x i1> %5 to i64
1812 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1813 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1814 ; VLX: # %bb.0: # %entry
1815 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
1816 ; VLX-NEXT: kmovd %k0, %eax
1817 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1818 ; VLX-NEXT: vzeroupper
1821 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
1822 ; NoVLX: # %bb.0: # %entry
1823 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1824 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1825 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1826 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1827 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1828 ; NoVLX-NEXT: kmovw %k0, %eax
1829 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1830 ; NoVLX-NEXT: vzeroupper
1833 %0 = bitcast <4 x i64> %__a to <8 x i32>
1834 %1 = bitcast <4 x i64> %__b to <8 x i32>
1835 %2 = icmp eq <8 x i32> %0, %1
1836 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1837 %4 = bitcast <16 x i1> %3 to i16
1841 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
1842 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1843 ; VLX: # %bb.0: # %entry
1844 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
1845 ; VLX-NEXT: kmovd %k0, %eax
1846 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1847 ; VLX-NEXT: vzeroupper
1850 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
1851 ; NoVLX: # %bb.0: # %entry
1852 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1853 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
1854 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1855 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1856 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1857 ; NoVLX-NEXT: kmovw %k0, %eax
1858 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1859 ; NoVLX-NEXT: vzeroupper
1862 %0 = bitcast <4 x i64> %__a to <8 x i32>
1863 %load = load <4 x i64>, <4 x i64>* %__b
1864 %1 = bitcast <4 x i64> %load to <8 x i32>
1865 %2 = icmp eq <8 x i32> %0, %1
1866 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1867 %4 = bitcast <16 x i1> %3 to i16
1871 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
1872 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1873 ; VLX: # %bb.0: # %entry
1874 ; VLX-NEXT: kmovd %edi, %k1
1875 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
1876 ; VLX-NEXT: kmovd %k0, %eax
1877 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1878 ; VLX-NEXT: vzeroupper
1881 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
1882 ; NoVLX: # %bb.0: # %entry
1883 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1884 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1885 ; NoVLX-NEXT: kmovw %edi, %k1
1886 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1887 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1888 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1889 ; NoVLX-NEXT: kmovw %k0, %eax
1890 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1891 ; NoVLX-NEXT: vzeroupper
1894 %0 = bitcast <4 x i64> %__a to <8 x i32>
1895 %1 = bitcast <4 x i64> %__b to <8 x i32>
1896 %2 = icmp eq <8 x i32> %0, %1
1897 %3 = bitcast i8 %__u to <8 x i1>
1898 %4 = and <8 x i1> %2, %3
1899 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1900 %6 = bitcast <16 x i1> %5 to i16
1904 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
1905 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1906 ; VLX: # %bb.0: # %entry
1907 ; VLX-NEXT: kmovd %edi, %k1
1908 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
1909 ; VLX-NEXT: kmovd %k0, %eax
1910 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1911 ; VLX-NEXT: vzeroupper
1914 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
1915 ; NoVLX: # %bb.0: # %entry
1916 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1917 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
1918 ; NoVLX-NEXT: kmovw %edi, %k1
1919 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1920 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1921 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1922 ; NoVLX-NEXT: kmovw %k0, %eax
1923 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1924 ; NoVLX-NEXT: vzeroupper
1927 %0 = bitcast <4 x i64> %__a to <8 x i32>
1928 %load = load <4 x i64>, <4 x i64>* %__b
1929 %1 = bitcast <4 x i64> %load to <8 x i32>
1930 %2 = icmp eq <8 x i32> %0, %1
1931 %3 = bitcast i8 %__u to <8 x i1>
1932 %4 = and <8 x i1> %2, %3
1933 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1934 %6 = bitcast <16 x i1> %5 to i16
1939 define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
1940 ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1941 ; VLX: # %bb.0: # %entry
1942 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
1943 ; VLX-NEXT: kmovd %k0, %eax
1944 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1945 ; VLX-NEXT: vzeroupper
1948 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1949 ; NoVLX: # %bb.0: # %entry
1950 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1951 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
1952 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1953 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1954 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1955 ; NoVLX-NEXT: kmovw %k0, %eax
1956 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1957 ; NoVLX-NEXT: vzeroupper
1960 %0 = bitcast <4 x i64> %__a to <8 x i32>
1961 %load = load i32, i32* %__b
1962 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
1963 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1964 %2 = icmp eq <8 x i32> %0, %1
1965 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1966 %4 = bitcast <16 x i1> %3 to i16
1970 define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
1971 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1972 ; VLX: # %bb.0: # %entry
1973 ; VLX-NEXT: kmovd %edi, %k1
1974 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
1975 ; VLX-NEXT: kmovd %k0, %eax
1976 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
1977 ; VLX-NEXT: vzeroupper
1980 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
1981 ; NoVLX: # %bb.0: # %entry
1982 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1983 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
1984 ; NoVLX-NEXT: kmovw %edi, %k1
1985 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1986 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
1987 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
1988 ; NoVLX-NEXT: kmovw %k0, %eax
1989 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
1990 ; NoVLX-NEXT: vzeroupper
1993 %0 = bitcast <4 x i64> %__a to <8 x i32>
1994 %load = load i32, i32* %__b
1995 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
1996 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1997 %2 = icmp eq <8 x i32> %0, %1
1998 %3 = bitcast i8 %__u to <8 x i1>
1999 %4 = and <8 x i1> %3, %2
2000 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2001 %6 = bitcast <16 x i1> %5 to i16
2006 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2007 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
2008 ; VLX: # %bb.0: # %entry
2009 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
2010 ; VLX-NEXT: kmovd %k0, %eax
2011 ; VLX-NEXT: vzeroupper
2014 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
2015 ; NoVLX: # %bb.0: # %entry
2016 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2017 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2018 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2019 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2020 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2021 ; NoVLX-NEXT: kmovw %k0, %eax
2022 ; NoVLX-NEXT: vzeroupper
2025 %0 = bitcast <4 x i64> %__a to <8 x i32>
2026 %1 = bitcast <4 x i64> %__b to <8 x i32>
2027 %2 = icmp eq <8 x i32> %0, %1
2028 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2029 %4 = bitcast <32 x i1> %3 to i32
2033 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2034 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2035 ; VLX: # %bb.0: # %entry
2036 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
2037 ; VLX-NEXT: kmovd %k0, %eax
2038 ; VLX-NEXT: vzeroupper
2041 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
2042 ; NoVLX: # %bb.0: # %entry
2043 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2044 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
2045 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2046 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2047 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2048 ; NoVLX-NEXT: kmovw %k0, %eax
2049 ; NoVLX-NEXT: vzeroupper
2052 %0 = bitcast <4 x i64> %__a to <8 x i32>
2053 %load = load <4 x i64>, <4 x i64>* %__b
2054 %1 = bitcast <4 x i64> %load to <8 x i32>
2055 %2 = icmp eq <8 x i32> %0, %1
2056 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2057 %4 = bitcast <32 x i1> %3 to i32
2061 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2062 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2063 ; VLX: # %bb.0: # %entry
2064 ; VLX-NEXT: kmovd %edi, %k1
2065 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2066 ; VLX-NEXT: kmovd %k0, %eax
2067 ; VLX-NEXT: vzeroupper
2070 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
2071 ; NoVLX: # %bb.0: # %entry
2072 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2073 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2074 ; NoVLX-NEXT: kmovw %edi, %k1
2075 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2076 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2077 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2078 ; NoVLX-NEXT: kmovw %k0, %eax
2079 ; NoVLX-NEXT: vzeroupper
2082 %0 = bitcast <4 x i64> %__a to <8 x i32>
2083 %1 = bitcast <4 x i64> %__b to <8 x i32>
2084 %2 = icmp eq <8 x i32> %0, %1
2085 %3 = bitcast i8 %__u to <8 x i1>
2086 %4 = and <8 x i1> %2, %3
2087 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2088 %6 = bitcast <32 x i1> %5 to i32
2092 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2093 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2094 ; VLX: # %bb.0: # %entry
2095 ; VLX-NEXT: kmovd %edi, %k1
2096 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2097 ; VLX-NEXT: kmovd %k0, %eax
2098 ; VLX-NEXT: vzeroupper
2101 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
2102 ; NoVLX: # %bb.0: # %entry
2103 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2104 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
2105 ; NoVLX-NEXT: kmovw %edi, %k1
2106 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2107 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2108 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2109 ; NoVLX-NEXT: kmovw %k0, %eax
2110 ; NoVLX-NEXT: vzeroupper
2113 %0 = bitcast <4 x i64> %__a to <8 x i32>
2114 %load = load <4 x i64>, <4 x i64>* %__b
2115 %1 = bitcast <4 x i64> %load to <8 x i32>
2116 %2 = icmp eq <8 x i32> %0, %1
2117 %3 = bitcast i8 %__u to <8 x i1>
2118 %4 = and <8 x i1> %2, %3
2119 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2120 %6 = bitcast <32 x i1> %5 to i32
2125 define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
2126 ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2127 ; VLX: # %bb.0: # %entry
2128 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2129 ; VLX-NEXT: kmovd %k0, %eax
2130 ; VLX-NEXT: vzeroupper
2133 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2134 ; NoVLX: # %bb.0: # %entry
2135 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2136 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
2137 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2138 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2139 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2140 ; NoVLX-NEXT: kmovw %k0, %eax
2141 ; NoVLX-NEXT: vzeroupper
2144 %0 = bitcast <4 x i64> %__a to <8 x i32>
2145 %load = load i32, i32* %__b
2146 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2147 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2148 %2 = icmp eq <8 x i32> %0, %1
2149 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2150 %4 = bitcast <32 x i1> %3 to i32
2154 define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
2155 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2156 ; VLX: # %bb.0: # %entry
2157 ; VLX-NEXT: kmovd %edi, %k1
2158 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2159 ; VLX-NEXT: kmovd %k0, %eax
2160 ; VLX-NEXT: vzeroupper
2163 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
2164 ; NoVLX: # %bb.0: # %entry
2165 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2166 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
2167 ; NoVLX-NEXT: kmovw %edi, %k1
2168 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2169 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2170 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2171 ; NoVLX-NEXT: kmovw %k0, %eax
2172 ; NoVLX-NEXT: vzeroupper
2175 %0 = bitcast <4 x i64> %__a to <8 x i32>
2176 %load = load i32, i32* %__b
2177 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2178 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2179 %2 = icmp eq <8 x i32> %0, %1
2180 %3 = bitcast i8 %__u to <8 x i1>
2181 %4 = and <8 x i1> %3, %2
2182 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2183 %6 = bitcast <32 x i1> %5 to i32
2188 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2189 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2190 ; VLX: # %bb.0: # %entry
2191 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
2192 ; VLX-NEXT: kmovq %k0, %rax
2193 ; VLX-NEXT: vzeroupper
2196 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
2197 ; NoVLX: # %bb.0: # %entry
2198 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2199 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2200 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2201 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2202 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2203 ; NoVLX-NEXT: kmovw %k0, %eax
2204 ; NoVLX-NEXT: movzwl %ax, %eax
2205 ; NoVLX-NEXT: vzeroupper
2208 %0 = bitcast <4 x i64> %__a to <8 x i32>
2209 %1 = bitcast <4 x i64> %__b to <8 x i32>
2210 %2 = icmp eq <8 x i32> %0, %1
2211 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2212 %4 = bitcast <64 x i1> %3 to i64
2216 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2217 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2218 ; VLX: # %bb.0: # %entry
2219 ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
2220 ; VLX-NEXT: kmovq %k0, %rax
2221 ; VLX-NEXT: vzeroupper
2224 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
2225 ; NoVLX: # %bb.0: # %entry
2226 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2227 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
2228 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2229 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2230 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2231 ; NoVLX-NEXT: kmovw %k0, %eax
2232 ; NoVLX-NEXT: movzwl %ax, %eax
2233 ; NoVLX-NEXT: vzeroupper
2236 %0 = bitcast <4 x i64> %__a to <8 x i32>
2237 %load = load <4 x i64>, <4 x i64>* %__b
2238 %1 = bitcast <4 x i64> %load to <8 x i32>
2239 %2 = icmp eq <8 x i32> %0, %1
2240 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2241 %4 = bitcast <64 x i1> %3 to i64
2245 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
2246 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2247 ; VLX: # %bb.0: # %entry
2248 ; VLX-NEXT: kmovd %edi, %k1
2249 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
2250 ; VLX-NEXT: kmovq %k0, %rax
2251 ; VLX-NEXT: vzeroupper
2254 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
2255 ; NoVLX: # %bb.0: # %entry
2256 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
2257 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2258 ; NoVLX-NEXT: kmovw %edi, %k1
2259 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2260 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2261 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2262 ; NoVLX-NEXT: kmovw %k0, %eax
2263 ; NoVLX-NEXT: movzwl %ax, %eax
2264 ; NoVLX-NEXT: vzeroupper
2267 %0 = bitcast <4 x i64> %__a to <8 x i32>
2268 %1 = bitcast <4 x i64> %__b to <8 x i32>
2269 %2 = icmp eq <8 x i32> %0, %1
2270 %3 = bitcast i8 %__u to <8 x i1>
2271 %4 = and <8 x i1> %2, %3
2272 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2273 %6 = bitcast <64 x i1> %5 to i64
2277 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
2278 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2279 ; VLX: # %bb.0: # %entry
2280 ; VLX-NEXT: kmovd %edi, %k1
2281 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
2282 ; VLX-NEXT: kmovq %k0, %rax
2283 ; VLX-NEXT: vzeroupper
2286 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
2287 ; NoVLX: # %bb.0: # %entry
2288 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2289 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
2290 ; NoVLX-NEXT: kmovw %edi, %k1
2291 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2292 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2293 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2294 ; NoVLX-NEXT: kmovw %k0, %eax
2295 ; NoVLX-NEXT: movzwl %ax, %eax
2296 ; NoVLX-NEXT: vzeroupper
2299 %0 = bitcast <4 x i64> %__a to <8 x i32>
2300 %load = load <4 x i64>, <4 x i64>* %__b
2301 %1 = bitcast <4 x i64> %load to <8 x i32>
2302 %2 = icmp eq <8 x i32> %0, %1
2303 %3 = bitcast i8 %__u to <8 x i1>
2304 %4 = and <8 x i1> %2, %3
2305 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2306 %6 = bitcast <64 x i1> %5 to i64
2311 define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
2312 ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2313 ; VLX: # %bb.0: # %entry
2314 ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
2315 ; VLX-NEXT: kmovq %k0, %rax
2316 ; VLX-NEXT: vzeroupper
2319 ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2320 ; NoVLX: # %bb.0: # %entry
2321 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2322 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
2323 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2324 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2325 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2326 ; NoVLX-NEXT: kmovw %k0, %eax
2327 ; NoVLX-NEXT: movzwl %ax, %eax
2328 ; NoVLX-NEXT: vzeroupper
2331 %0 = bitcast <4 x i64> %__a to <8 x i32>
2332 %load = load i32, i32* %__b
2333 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2334 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2335 %2 = icmp eq <8 x i32> %0, %1
2336 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2337 %4 = bitcast <64 x i1> %3 to i64
2341 define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
2342 ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2343 ; VLX: # %bb.0: # %entry
2344 ; VLX-NEXT: kmovd %edi, %k1
2345 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
2346 ; VLX-NEXT: kmovq %k0, %rax
2347 ; VLX-NEXT: vzeroupper
2350 ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
2351 ; NoVLX: # %bb.0: # %entry
2352 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2353 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
2354 ; NoVLX-NEXT: kmovw %edi, %k1
2355 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2356 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
2357 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
2358 ; NoVLX-NEXT: kmovw %k0, %eax
2359 ; NoVLX-NEXT: movzwl %ax, %eax
2360 ; NoVLX-NEXT: vzeroupper
2363 %0 = bitcast <4 x i64> %__a to <8 x i32>
2364 %load = load i32, i32* %__b
2365 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
2366 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2367 %2 = icmp eq <8 x i32> %0, %1
2368 %3 = bitcast i8 %__u to <8 x i1>
2369 %4 = and <8 x i1> %3, %2
2370 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2371 %6 = bitcast <64 x i1> %5 to i64
2376 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2377 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2378 ; VLX: # %bb.0: # %entry
2379 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2380 ; VLX-NEXT: kmovd %k0, %eax
2381 ; VLX-NEXT: vzeroupper
2384 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
2385 ; NoVLX: # %bb.0: # %entry
2386 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2387 ; NoVLX-NEXT: kmovw %k0, %eax
2388 ; NoVLX-NEXT: vzeroupper
2391 %0 = bitcast <8 x i64> %__a to <16 x i32>
2392 %1 = bitcast <8 x i64> %__b to <16 x i32>
2393 %2 = icmp eq <16 x i32> %0, %1
2394 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2395 %4 = bitcast <32 x i1> %3 to i32
2399 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2400 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2401 ; VLX: # %bb.0: # %entry
2402 ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2403 ; VLX-NEXT: kmovd %k0, %eax
2404 ; VLX-NEXT: vzeroupper
2407 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
2408 ; NoVLX: # %bb.0: # %entry
2409 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2410 ; NoVLX-NEXT: kmovw %k0, %eax
2411 ; NoVLX-NEXT: vzeroupper
2414 %0 = bitcast <8 x i64> %__a to <16 x i32>
2415 %load = load <8 x i64>, <8 x i64>* %__b
2416 %1 = bitcast <8 x i64> %load to <16 x i32>
2417 %2 = icmp eq <16 x i32> %0, %1
2418 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2419 %4 = bitcast <32 x i1> %3 to i32
2423 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2424 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2425 ; VLX: # %bb.0: # %entry
2426 ; VLX-NEXT: kmovd %edi, %k1
2427 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2428 ; VLX-NEXT: kmovd %k0, %eax
2429 ; VLX-NEXT: vzeroupper
2432 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
2433 ; NoVLX: # %bb.0: # %entry
2434 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2435 ; NoVLX-NEXT: kmovw %k0, %eax
2436 ; NoVLX-NEXT: andl %edi, %eax
2437 ; NoVLX-NEXT: vzeroupper
2440 %0 = bitcast <8 x i64> %__a to <16 x i32>
2441 %1 = bitcast <8 x i64> %__b to <16 x i32>
2442 %2 = icmp eq <16 x i32> %0, %1
2443 %3 = bitcast i16 %__u to <16 x i1>
2444 %4 = and <16 x i1> %2, %3
2445 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2446 %6 = bitcast <32 x i1> %5 to i32
2450 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2451 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2452 ; VLX: # %bb.0: # %entry
2453 ; VLX-NEXT: kmovd %edi, %k1
2454 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2455 ; VLX-NEXT: kmovd %k0, %eax
2456 ; VLX-NEXT: vzeroupper
2459 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
2460 ; NoVLX: # %bb.0: # %entry
2461 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
2462 ; NoVLX-NEXT: kmovw %k0, %eax
2463 ; NoVLX-NEXT: andl %edi, %eax
2464 ; NoVLX-NEXT: vzeroupper
2467 %0 = bitcast <8 x i64> %__a to <16 x i32>
2468 %load = load <8 x i64>, <8 x i64>* %__b
2469 %1 = bitcast <8 x i64> %load to <16 x i32>
2470 %2 = icmp eq <16 x i32> %0, %1
2471 %3 = bitcast i16 %__u to <16 x i1>
2472 %4 = and <16 x i1> %2, %3
2473 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2474 %6 = bitcast <32 x i1> %5 to i32
2479 define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
2480 ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2481 ; VLX: # %bb.0: # %entry
2482 ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2483 ; VLX-NEXT: kmovd %k0, %eax
2484 ; VLX-NEXT: vzeroupper
2487 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2488 ; NoVLX: # %bb.0: # %entry
2489 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2490 ; NoVLX-NEXT: kmovw %k0, %eax
2491 ; NoVLX-NEXT: vzeroupper
2494 %0 = bitcast <8 x i64> %__a to <16 x i32>
2495 %load = load i32, i32* %__b
2496 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2497 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2498 %2 = icmp eq <16 x i32> %0, %1
2499 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2500 %4 = bitcast <32 x i1> %3 to i32
2504 define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
2505 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2506 ; VLX: # %bb.0: # %entry
2507 ; VLX-NEXT: kmovd %edi, %k1
2508 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2509 ; VLX-NEXT: kmovd %k0, %eax
2510 ; VLX-NEXT: vzeroupper
2513 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
2514 ; NoVLX: # %bb.0: # %entry
2515 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2516 ; NoVLX-NEXT: kmovw %k0, %eax
2517 ; NoVLX-NEXT: andl %edi, %eax
2518 ; NoVLX-NEXT: vzeroupper
2521 %0 = bitcast <8 x i64> %__a to <16 x i32>
2522 %load = load i32, i32* %__b
2523 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2524 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2525 %2 = icmp eq <16 x i32> %0, %1
2526 %3 = bitcast i16 %__u to <16 x i1>
2527 %4 = and <16 x i1> %3, %2
2528 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2529 %6 = bitcast <32 x i1> %5 to i32
2534 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2535 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2536 ; VLX: # %bb.0: # %entry
2537 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2538 ; VLX-NEXT: kmovq %k0, %rax
2539 ; VLX-NEXT: vzeroupper
2542 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
2543 ; NoVLX: # %bb.0: # %entry
2544 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2545 ; NoVLX-NEXT: kmovw %k0, %eax
2546 ; NoVLX-NEXT: movzwl %ax, %eax
2547 ; NoVLX-NEXT: vzeroupper
2550 %0 = bitcast <8 x i64> %__a to <16 x i32>
2551 %1 = bitcast <8 x i64> %__b to <16 x i32>
2552 %2 = icmp eq <16 x i32> %0, %1
2553 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2554 %4 = bitcast <64 x i1> %3 to i64
2558 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2559 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2560 ; VLX: # %bb.0: # %entry
2561 ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2562 ; VLX-NEXT: kmovq %k0, %rax
2563 ; VLX-NEXT: vzeroupper
2566 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
2567 ; NoVLX: # %bb.0: # %entry
2568 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
2569 ; NoVLX-NEXT: kmovw %k0, %eax
2570 ; NoVLX-NEXT: movzwl %ax, %eax
2571 ; NoVLX-NEXT: vzeroupper
2574 %0 = bitcast <8 x i64> %__a to <16 x i32>
2575 %load = load <8 x i64>, <8 x i64>* %__b
2576 %1 = bitcast <8 x i64> %load to <16 x i32>
2577 %2 = icmp eq <16 x i32> %0, %1
2578 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2579 %4 = bitcast <64 x i1> %3 to i64
2583 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
2584 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2585 ; VLX: # %bb.0: # %entry
2586 ; VLX-NEXT: kmovd %edi, %k1
2587 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2588 ; VLX-NEXT: kmovq %k0, %rax
2589 ; VLX-NEXT: vzeroupper
2592 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
2593 ; NoVLX: # %bb.0: # %entry
2594 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2595 ; NoVLX-NEXT: kmovw %k0, %eax
2596 ; NoVLX-NEXT: andl %edi, %eax
2597 ; NoVLX-NEXT: vzeroupper
2600 %0 = bitcast <8 x i64> %__a to <16 x i32>
2601 %1 = bitcast <8 x i64> %__b to <16 x i32>
2602 %2 = icmp eq <16 x i32> %0, %1
2603 %3 = bitcast i16 %__u to <16 x i1>
2604 %4 = and <16 x i1> %2, %3
2605 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2606 %6 = bitcast <64 x i1> %5 to i64
2610 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2611 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2612 ; VLX: # %bb.0: # %entry
2613 ; VLX-NEXT: kmovd %edi, %k1
2614 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
2615 ; VLX-NEXT: kmovq %k0, %rax
2616 ; VLX-NEXT: vzeroupper
2619 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
2620 ; NoVLX: # %bb.0: # %entry
2621 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
2622 ; NoVLX-NEXT: kmovw %k0, %eax
2623 ; NoVLX-NEXT: andl %edi, %eax
2624 ; NoVLX-NEXT: vzeroupper
2627 %0 = bitcast <8 x i64> %__a to <16 x i32>
2628 %load = load <8 x i64>, <8 x i64>* %__b
2629 %1 = bitcast <8 x i64> %load to <16 x i32>
2630 %2 = icmp eq <16 x i32> %0, %1
2631 %3 = bitcast i16 %__u to <16 x i1>
2632 %4 = and <16 x i1> %2, %3
2633 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2634 %6 = bitcast <64 x i1> %5 to i64
2639 define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
2640 ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2641 ; VLX: # %bb.0: # %entry
2642 ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2643 ; VLX-NEXT: kmovq %k0, %rax
2644 ; VLX-NEXT: vzeroupper
2647 ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2648 ; NoVLX: # %bb.0: # %entry
2649 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
2650 ; NoVLX-NEXT: kmovw %k0, %eax
2651 ; NoVLX-NEXT: movzwl %ax, %eax
2652 ; NoVLX-NEXT: vzeroupper
2655 %0 = bitcast <8 x i64> %__a to <16 x i32>
2656 %load = load i32, i32* %__b
2657 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2658 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2659 %2 = icmp eq <16 x i32> %0, %1
2660 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2661 %4 = bitcast <64 x i1> %3 to i64
2665 define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
2666 ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2667 ; VLX: # %bb.0: # %entry
2668 ; VLX-NEXT: kmovd %edi, %k1
2669 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
2670 ; VLX-NEXT: kmovq %k0, %rax
2671 ; VLX-NEXT: vzeroupper
2674 ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
2675 ; NoVLX: # %bb.0: # %entry
2676 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0
2677 ; NoVLX-NEXT: kmovw %k0, %eax
2678 ; NoVLX-NEXT: andl %edi, %eax
2679 ; NoVLX-NEXT: vzeroupper
2682 %0 = bitcast <8 x i64> %__a to <16 x i32>
2683 %load = load i32, i32* %__b
2684 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
2685 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2686 %2 = icmp eq <16 x i32> %0, %1
2687 %3 = bitcast i16 %__u to <16 x i1>
2688 %4 = and <16 x i1> %3, %2
2689 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
2690 %6 = bitcast <64 x i1> %5 to i64
2695 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2696 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2697 ; VLX: # %bb.0: # %entry
2698 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
2699 ; VLX-NEXT: kmovb %k0, %eax
2702 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
2703 ; NoVLX: # %bb.0: # %entry
2704 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2705 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2706 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2707 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2708 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2709 ; NoVLX-NEXT: kmovw %k0, %eax
2710 ; NoVLX-NEXT: andl $3, %eax
2711 ; NoVLX-NEXT: vzeroupper
2714 %0 = bitcast <2 x i64> %__a to <2 x i64>
2715 %1 = bitcast <2 x i64> %__b to <2 x i64>
2716 %2 = icmp eq <2 x i64> %0, %1
2717 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2718 %4 = bitcast <4 x i1> %3 to i4
2722 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2723 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2724 ; VLX: # %bb.0: # %entry
2725 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
2726 ; VLX-NEXT: kmovb %k0, %eax
2729 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
2730 ; NoVLX: # %bb.0: # %entry
2731 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2732 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2733 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2734 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2735 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2736 ; NoVLX-NEXT: kmovw %k0, %eax
2737 ; NoVLX-NEXT: andl $3, %eax
2738 ; NoVLX-NEXT: vzeroupper
2741 %0 = bitcast <2 x i64> %__a to <2 x i64>
2742 %load = load <2 x i64>, <2 x i64>* %__b
2743 %1 = bitcast <2 x i64> %load to <2 x i64>
2744 %2 = icmp eq <2 x i64> %0, %1
2745 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2746 %4 = bitcast <4 x i1> %3 to i4
2750 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2751 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2752 ; VLX: # %bb.0: # %entry
2753 ; VLX-NEXT: kmovd %edi, %k1
2754 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2755 ; VLX-NEXT: kmovb %k0, %eax
2758 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
2759 ; NoVLX: # %bb.0: # %entry
2760 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2761 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2762 ; NoVLX-NEXT: kmovw %edi, %k1
2763 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2764 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2765 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2766 ; NoVLX-NEXT: kmovw %k0, %eax
2767 ; NoVLX-NEXT: andl $3, %eax
2768 ; NoVLX-NEXT: vzeroupper
2771 %0 = bitcast <2 x i64> %__a to <2 x i64>
2772 %1 = bitcast <2 x i64> %__b to <2 x i64>
2773 %2 = icmp eq <2 x i64> %0, %1
2774 %3 = bitcast i8 %__u to <8 x i1>
2775 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2776 %4 = and <2 x i1> %2, %extract.i
2777 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2778 %6 = bitcast <4 x i1> %5 to i4
2782 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2783 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2784 ; VLX: # %bb.0: # %entry
2785 ; VLX-NEXT: kmovd %edi, %k1
2786 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2787 ; VLX-NEXT: kmovb %k0, %eax
2790 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
2791 ; NoVLX: # %bb.0: # %entry
2792 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2793 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2794 ; NoVLX-NEXT: kmovw %edi, %k1
2795 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2796 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2797 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2798 ; NoVLX-NEXT: kmovw %k0, %eax
2799 ; NoVLX-NEXT: andl $3, %eax
2800 ; NoVLX-NEXT: vzeroupper
2803 %0 = bitcast <2 x i64> %__a to <2 x i64>
2804 %load = load <2 x i64>, <2 x i64>* %__b
2805 %1 = bitcast <2 x i64> %load to <2 x i64>
2806 %2 = icmp eq <2 x i64> %0, %1
2807 %3 = bitcast i8 %__u to <8 x i1>
2808 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2809 %4 = and <2 x i1> %2, %extract.i
2810 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2811 %6 = bitcast <4 x i1> %5 to i4
2816 define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
2817 ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2818 ; VLX: # %bb.0: # %entry
2819 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
2820 ; VLX-NEXT: kmovb %k0, %eax
2823 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2824 ; NoVLX: # %bb.0: # %entry
2825 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2826 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
2827 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2828 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2829 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2830 ; NoVLX-NEXT: kmovw %k0, %eax
2831 ; NoVLX-NEXT: andl $3, %eax
2832 ; NoVLX-NEXT: vzeroupper
2835 %0 = bitcast <2 x i64> %__a to <2 x i64>
2836 %load = load i64, i64* %__b
2837 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2838 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2839 %2 = icmp eq <2 x i64> %0, %1
2840 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2841 %4 = bitcast <4 x i1> %3 to i4
2845 define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
2846 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2847 ; VLX: # %bb.0: # %entry
2848 ; VLX-NEXT: kmovd %edi, %k1
2849 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
2850 ; VLX-NEXT: kmovb %k0, %eax
2853 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
2854 ; NoVLX: # %bb.0: # %entry
2855 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2856 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
2857 ; NoVLX-NEXT: kmovw %edi, %k1
2858 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2859 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2860 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2861 ; NoVLX-NEXT: kmovw %k0, %eax
2862 ; NoVLX-NEXT: andl $3, %eax
2863 ; NoVLX-NEXT: vzeroupper
2866 %0 = bitcast <2 x i64> %__a to <2 x i64>
2867 %load = load i64, i64* %__b
2868 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
2869 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2870 %2 = icmp eq <2 x i64> %0, %1
2871 %3 = bitcast i8 %__u to <8 x i1>
2872 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2873 %4 = and <2 x i1> %extract.i, %2
2874 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2875 %6 = bitcast <4 x i1> %5 to i4
2880 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2881 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2882 ; VLX: # %bb.0: # %entry
2883 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
2884 ; VLX-NEXT: kmovd %k0, %eax
2885 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2888 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
2889 ; NoVLX: # %bb.0: # %entry
2890 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2891 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2892 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2893 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2894 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2895 ; NoVLX-NEXT: kmovw %k0, %eax
2896 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2897 ; NoVLX-NEXT: vzeroupper
2900 %0 = bitcast <2 x i64> %__a to <2 x i64>
2901 %1 = bitcast <2 x i64> %__b to <2 x i64>
2902 %2 = icmp eq <2 x i64> %0, %1
2903 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2904 %4 = bitcast <8 x i1> %3 to i8
2908 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2909 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2910 ; VLX: # %bb.0: # %entry
2911 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
2912 ; VLX-NEXT: kmovd %k0, %eax
2913 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2916 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
2917 ; NoVLX: # %bb.0: # %entry
2918 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2919 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2920 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
2921 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2922 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2923 ; NoVLX-NEXT: kmovw %k0, %eax
2924 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2925 ; NoVLX-NEXT: vzeroupper
2928 %0 = bitcast <2 x i64> %__a to <2 x i64>
2929 %load = load <2 x i64>, <2 x i64>* %__b
2930 %1 = bitcast <2 x i64> %load to <2 x i64>
2931 %2 = icmp eq <2 x i64> %0, %1
2932 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2933 %4 = bitcast <8 x i1> %3 to i8
2937 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2938 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2939 ; VLX: # %bb.0: # %entry
2940 ; VLX-NEXT: kmovd %edi, %k1
2941 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
2942 ; VLX-NEXT: kmovd %k0, %eax
2943 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2946 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
2947 ; NoVLX: # %bb.0: # %entry
2948 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
2949 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2950 ; NoVLX-NEXT: kmovw %edi, %k1
2951 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2952 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2953 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2954 ; NoVLX-NEXT: kmovw %k0, %eax
2955 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2956 ; NoVLX-NEXT: vzeroupper
2959 %0 = bitcast <2 x i64> %__a to <2 x i64>
2960 %1 = bitcast <2 x i64> %__b to <2 x i64>
2961 %2 = icmp eq <2 x i64> %0, %1
2962 %3 = bitcast i8 %__u to <8 x i1>
2963 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2964 %4 = and <2 x i1> %2, %extract.i
2965 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
2966 %6 = bitcast <8 x i1> %5 to i8
2970 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2971 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2972 ; VLX: # %bb.0: # %entry
2973 ; VLX-NEXT: kmovd %edi, %k1
2974 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
2975 ; VLX-NEXT: kmovd %k0, %eax
2976 ; VLX-NEXT: # kill: def $al killed $al killed $eax
2979 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
2980 ; NoVLX: # %bb.0: # %entry
2981 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2982 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2983 ; NoVLX-NEXT: kmovw %edi, %k1
2984 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
2985 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
2986 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
2987 ; NoVLX-NEXT: kmovw %k0, %eax
2988 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
2989 ; NoVLX-NEXT: vzeroupper
2992 %0 = bitcast <2 x i64> %__a to <2 x i64>
2993 %load = load <2 x i64>, <2 x i64>* %__b
2994 %1 = bitcast <2 x i64> %load to <2 x i64>
2995 %2 = icmp eq <2 x i64> %0, %1
2996 %3 = bitcast i8 %__u to <8 x i1>
2997 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
2998 %4 = and <2 x i1> %2, %extract.i
2999 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3000 %6 = bitcast <8 x i1> %5 to i8
3005 define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3006 ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3007 ; VLX: # %bb.0: # %entry
3008 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3009 ; VLX-NEXT: kmovd %k0, %eax
3010 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3013 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3014 ; NoVLX: # %bb.0: # %entry
3015 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3016 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
3017 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3018 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3019 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3020 ; NoVLX-NEXT: kmovw %k0, %eax
3021 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3022 ; NoVLX-NEXT: vzeroupper
3025 %0 = bitcast <2 x i64> %__a to <2 x i64>
3026 %load = load i64, i64* %__b
3027 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3028 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3029 %2 = icmp eq <2 x i64> %0, %1
3030 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3031 %4 = bitcast <8 x i1> %3 to i8
3035 define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3036 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3037 ; VLX: # %bb.0: # %entry
3038 ; VLX-NEXT: kmovd %edi, %k1
3039 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3040 ; VLX-NEXT: kmovd %k0, %eax
3041 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3044 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
3045 ; NoVLX: # %bb.0: # %entry
3046 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3047 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
3048 ; NoVLX-NEXT: kmovw %edi, %k1
3049 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3050 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3051 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3052 ; NoVLX-NEXT: kmovw %k0, %eax
3053 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3054 ; NoVLX-NEXT: vzeroupper
3057 %0 = bitcast <2 x i64> %__a to <2 x i64>
3058 %load = load i64, i64* %__b
3059 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3060 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3061 %2 = icmp eq <2 x i64> %0, %1
3062 %3 = bitcast i8 %__u to <8 x i1>
3063 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3064 %4 = and <2 x i1> %extract.i, %2
3065 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3066 %6 = bitcast <8 x i1> %5 to i8
3071 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3072 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3073 ; VLX: # %bb.0: # %entry
3074 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3075 ; VLX-NEXT: kmovd %k0, %eax
3076 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3079 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
3080 ; NoVLX: # %bb.0: # %entry
3081 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3082 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3083 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3084 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3085 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3086 ; NoVLX-NEXT: kmovw %k0, %eax
3087 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3088 ; NoVLX-NEXT: vzeroupper
3091 %0 = bitcast <2 x i64> %__a to <2 x i64>
3092 %1 = bitcast <2 x i64> %__b to <2 x i64>
3093 %2 = icmp eq <2 x i64> %0, %1
3094 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3095 %4 = bitcast <16 x i1> %3 to i16
3099 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3100 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3101 ; VLX: # %bb.0: # %entry
3102 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3103 ; VLX-NEXT: kmovd %k0, %eax
3104 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3107 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
3108 ; NoVLX: # %bb.0: # %entry
3109 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3110 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3111 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3112 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3113 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3114 ; NoVLX-NEXT: kmovw %k0, %eax
3115 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3116 ; NoVLX-NEXT: vzeroupper
3119 %0 = bitcast <2 x i64> %__a to <2 x i64>
3120 %load = load <2 x i64>, <2 x i64>* %__b
3121 %1 = bitcast <2 x i64> %load to <2 x i64>
3122 %2 = icmp eq <2 x i64> %0, %1
3123 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3124 %4 = bitcast <16 x i1> %3 to i16
3128 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3129 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3130 ; VLX: # %bb.0: # %entry
3131 ; VLX-NEXT: kmovd %edi, %k1
3132 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3133 ; VLX-NEXT: kmovd %k0, %eax
3134 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3137 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
3138 ; NoVLX: # %bb.0: # %entry
3139 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3140 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3141 ; NoVLX-NEXT: kmovw %edi, %k1
3142 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3143 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3144 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3145 ; NoVLX-NEXT: kmovw %k0, %eax
3146 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3147 ; NoVLX-NEXT: vzeroupper
3150 %0 = bitcast <2 x i64> %__a to <2 x i64>
3151 %1 = bitcast <2 x i64> %__b to <2 x i64>
3152 %2 = icmp eq <2 x i64> %0, %1
3153 %3 = bitcast i8 %__u to <8 x i1>
3154 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3155 %4 = and <2 x i1> %2, %extract.i
3156 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3157 %6 = bitcast <16 x i1> %5 to i16
3161 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3162 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3163 ; VLX: # %bb.0: # %entry
3164 ; VLX-NEXT: kmovd %edi, %k1
3165 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3166 ; VLX-NEXT: kmovd %k0, %eax
3167 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3170 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
3171 ; NoVLX: # %bb.0: # %entry
3172 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3173 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3174 ; NoVLX-NEXT: kmovw %edi, %k1
3175 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3176 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3177 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3178 ; NoVLX-NEXT: kmovw %k0, %eax
3179 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3180 ; NoVLX-NEXT: vzeroupper
3183 %0 = bitcast <2 x i64> %__a to <2 x i64>
3184 %load = load <2 x i64>, <2 x i64>* %__b
3185 %1 = bitcast <2 x i64> %load to <2 x i64>
3186 %2 = icmp eq <2 x i64> %0, %1
3187 %3 = bitcast i8 %__u to <8 x i1>
3188 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3189 %4 = and <2 x i1> %2, %extract.i
3190 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3191 %6 = bitcast <16 x i1> %5 to i16
3196 define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3197 ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3198 ; VLX: # %bb.0: # %entry
3199 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3200 ; VLX-NEXT: kmovd %k0, %eax
3201 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3204 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3205 ; NoVLX: # %bb.0: # %entry
3206 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3207 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
3208 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3209 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3210 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3211 ; NoVLX-NEXT: kmovw %k0, %eax
3212 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3213 ; NoVLX-NEXT: vzeroupper
3216 %0 = bitcast <2 x i64> %__a to <2 x i64>
3217 %load = load i64, i64* %__b
3218 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3219 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3220 %2 = icmp eq <2 x i64> %0, %1
3221 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3222 %4 = bitcast <16 x i1> %3 to i16
3226 define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3227 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3228 ; VLX: # %bb.0: # %entry
3229 ; VLX-NEXT: kmovd %edi, %k1
3230 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3231 ; VLX-NEXT: kmovd %k0, %eax
3232 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3235 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
3236 ; NoVLX: # %bb.0: # %entry
3237 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3238 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
3239 ; NoVLX-NEXT: kmovw %edi, %k1
3240 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3241 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3242 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3243 ; NoVLX-NEXT: kmovw %k0, %eax
3244 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3245 ; NoVLX-NEXT: vzeroupper
3248 %0 = bitcast <2 x i64> %__a to <2 x i64>
3249 %load = load i64, i64* %__b
3250 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3251 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3252 %2 = icmp eq <2 x i64> %0, %1
3253 %3 = bitcast i8 %__u to <8 x i1>
3254 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3255 %4 = and <2 x i1> %extract.i, %2
3256 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3257 %6 = bitcast <16 x i1> %5 to i16
3262 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3263 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3264 ; VLX: # %bb.0: # %entry
3265 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3266 ; VLX-NEXT: kmovd %k0, %eax
3269 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
3270 ; NoVLX: # %bb.0: # %entry
3271 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3272 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3273 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3274 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3275 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3276 ; NoVLX-NEXT: kmovw %k0, %eax
3277 ; NoVLX-NEXT: vzeroupper
3280 %0 = bitcast <2 x i64> %__a to <2 x i64>
3281 %1 = bitcast <2 x i64> %__b to <2 x i64>
3282 %2 = icmp eq <2 x i64> %0, %1
3283 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3284 %4 = bitcast <32 x i1> %3 to i32
3288 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3289 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3290 ; VLX: # %bb.0: # %entry
3291 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3292 ; VLX-NEXT: kmovd %k0, %eax
3295 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
3296 ; NoVLX: # %bb.0: # %entry
3297 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3298 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3299 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3300 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3301 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3302 ; NoVLX-NEXT: kmovw %k0, %eax
3303 ; NoVLX-NEXT: vzeroupper
3306 %0 = bitcast <2 x i64> %__a to <2 x i64>
3307 %load = load <2 x i64>, <2 x i64>* %__b
3308 %1 = bitcast <2 x i64> %load to <2 x i64>
3309 %2 = icmp eq <2 x i64> %0, %1
3310 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3311 %4 = bitcast <32 x i1> %3 to i32
3315 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3316 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3317 ; VLX: # %bb.0: # %entry
3318 ; VLX-NEXT: kmovd %edi, %k1
3319 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3320 ; VLX-NEXT: kmovd %k0, %eax
3323 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
3324 ; NoVLX: # %bb.0: # %entry
3325 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3326 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3327 ; NoVLX-NEXT: kmovw %edi, %k1
3328 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3329 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3330 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3331 ; NoVLX-NEXT: kmovw %k0, %eax
3332 ; NoVLX-NEXT: vzeroupper
3335 %0 = bitcast <2 x i64> %__a to <2 x i64>
3336 %1 = bitcast <2 x i64> %__b to <2 x i64>
3337 %2 = icmp eq <2 x i64> %0, %1
3338 %3 = bitcast i8 %__u to <8 x i1>
3339 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3340 %4 = and <2 x i1> %2, %extract.i
3341 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3342 %6 = bitcast <32 x i1> %5 to i32
3346 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3347 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3348 ; VLX: # %bb.0: # %entry
3349 ; VLX-NEXT: kmovd %edi, %k1
3350 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3351 ; VLX-NEXT: kmovd %k0, %eax
3354 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
3355 ; NoVLX: # %bb.0: # %entry
3356 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3357 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3358 ; NoVLX-NEXT: kmovw %edi, %k1
3359 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3360 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3361 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3362 ; NoVLX-NEXT: kmovw %k0, %eax
3363 ; NoVLX-NEXT: vzeroupper
3366 %0 = bitcast <2 x i64> %__a to <2 x i64>
3367 %load = load <2 x i64>, <2 x i64>* %__b
3368 %1 = bitcast <2 x i64> %load to <2 x i64>
3369 %2 = icmp eq <2 x i64> %0, %1
3370 %3 = bitcast i8 %__u to <8 x i1>
3371 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3372 %4 = and <2 x i1> %2, %extract.i
3373 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3374 %6 = bitcast <32 x i1> %5 to i32
3379 define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3380 ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3381 ; VLX: # %bb.0: # %entry
3382 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3383 ; VLX-NEXT: kmovd %k0, %eax
3386 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3387 ; NoVLX: # %bb.0: # %entry
3388 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3389 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
3390 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3391 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3392 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3393 ; NoVLX-NEXT: kmovw %k0, %eax
3394 ; NoVLX-NEXT: vzeroupper
3397 %0 = bitcast <2 x i64> %__a to <2 x i64>
3398 %load = load i64, i64* %__b
3399 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3400 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3401 %2 = icmp eq <2 x i64> %0, %1
3402 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3403 %4 = bitcast <32 x i1> %3 to i32
3407 define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3408 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3409 ; VLX: # %bb.0: # %entry
3410 ; VLX-NEXT: kmovd %edi, %k1
3411 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3412 ; VLX-NEXT: kmovd %k0, %eax
3415 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
3416 ; NoVLX: # %bb.0: # %entry
3417 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3418 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
3419 ; NoVLX-NEXT: kmovw %edi, %k1
3420 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3421 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3422 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3423 ; NoVLX-NEXT: kmovw %k0, %eax
3424 ; NoVLX-NEXT: vzeroupper
3427 %0 = bitcast <2 x i64> %__a to <2 x i64>
3428 %load = load i64, i64* %__b
3429 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3430 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3431 %2 = icmp eq <2 x i64> %0, %1
3432 %3 = bitcast i8 %__u to <8 x i1>
3433 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3434 %4 = and <2 x i1> %extract.i, %2
3435 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3436 %6 = bitcast <32 x i1> %5 to i32
3441 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3442 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3443 ; VLX: # %bb.0: # %entry
3444 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
3445 ; VLX-NEXT: kmovq %k0, %rax
3448 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
3449 ; NoVLX: # %bb.0: # %entry
3450 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3451 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3452 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3453 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3454 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3455 ; NoVLX-NEXT: kmovw %k0, %eax
3456 ; NoVLX-NEXT: movzwl %ax, %eax
3457 ; NoVLX-NEXT: vzeroupper
3460 %0 = bitcast <2 x i64> %__a to <2 x i64>
3461 %1 = bitcast <2 x i64> %__b to <2 x i64>
3462 %2 = icmp eq <2 x i64> %0, %1
3463 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3464 %4 = bitcast <64 x i1> %3 to i64
3468 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3469 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3470 ; VLX: # %bb.0: # %entry
3471 ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
3472 ; VLX-NEXT: kmovq %k0, %rax
3475 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
3476 ; NoVLX: # %bb.0: # %entry
3477 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3478 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
3479 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3480 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3481 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3482 ; NoVLX-NEXT: kmovw %k0, %eax
3483 ; NoVLX-NEXT: movzwl %ax, %eax
3484 ; NoVLX-NEXT: vzeroupper
3487 %0 = bitcast <2 x i64> %__a to <2 x i64>
3488 %load = load <2 x i64>, <2 x i64>* %__b
3489 %1 = bitcast <2 x i64> %load to <2 x i64>
3490 %2 = icmp eq <2 x i64> %0, %1
3491 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3492 %4 = bitcast <64 x i1> %3 to i64
3496 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
3497 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3498 ; VLX: # %bb.0: # %entry
3499 ; VLX-NEXT: kmovd %edi, %k1
3500 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
3501 ; VLX-NEXT: kmovq %k0, %rax
3504 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
3505 ; NoVLX: # %bb.0: # %entry
3506 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
3507 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3508 ; NoVLX-NEXT: kmovw %edi, %k1
3509 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3510 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3511 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3512 ; NoVLX-NEXT: kmovw %k0, %eax
3513 ; NoVLX-NEXT: movzwl %ax, %eax
3514 ; NoVLX-NEXT: vzeroupper
3517 %0 = bitcast <2 x i64> %__a to <2 x i64>
3518 %1 = bitcast <2 x i64> %__b to <2 x i64>
3519 %2 = icmp eq <2 x i64> %0, %1
3520 %3 = bitcast i8 %__u to <8 x i1>
3521 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3522 %4 = and <2 x i1> %2, %extract.i
3523 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3524 %6 = bitcast <64 x i1> %5 to i64
3528 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
3529 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3530 ; VLX: # %bb.0: # %entry
3531 ; VLX-NEXT: kmovd %edi, %k1
3532 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
3533 ; VLX-NEXT: kmovq %k0, %rax
3536 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
3537 ; NoVLX: # %bb.0: # %entry
3538 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3539 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
3540 ; NoVLX-NEXT: kmovw %edi, %k1
3541 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3542 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3543 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3544 ; NoVLX-NEXT: kmovw %k0, %eax
3545 ; NoVLX-NEXT: movzwl %ax, %eax
3546 ; NoVLX-NEXT: vzeroupper
3549 %0 = bitcast <2 x i64> %__a to <2 x i64>
3550 %load = load <2 x i64>, <2 x i64>* %__b
3551 %1 = bitcast <2 x i64> %load to <2 x i64>
3552 %2 = icmp eq <2 x i64> %0, %1
3553 %3 = bitcast i8 %__u to <8 x i1>
3554 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3555 %4 = and <2 x i1> %2, %extract.i
3556 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3557 %6 = bitcast <64 x i1> %5 to i64
3562 define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
3563 ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3564 ; VLX: # %bb.0: # %entry
3565 ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
3566 ; VLX-NEXT: kmovq %k0, %rax
3569 ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3570 ; NoVLX: # %bb.0: # %entry
3571 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3572 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
3573 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3574 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3575 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3576 ; NoVLX-NEXT: kmovw %k0, %eax
3577 ; NoVLX-NEXT: movzwl %ax, %eax
3578 ; NoVLX-NEXT: vzeroupper
3581 %0 = bitcast <2 x i64> %__a to <2 x i64>
3582 %load = load i64, i64* %__b
3583 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3584 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3585 %2 = icmp eq <2 x i64> %0, %1
3586 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3587 %4 = bitcast <64 x i1> %3 to i64
3591 define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
3592 ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3593 ; VLX: # %bb.0: # %entry
3594 ; VLX-NEXT: kmovd %edi, %k1
3595 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
3596 ; VLX-NEXT: kmovq %k0, %rax
3599 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
3600 ; NoVLX: # %bb.0: # %entry
3601 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3602 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
3603 ; NoVLX-NEXT: kmovw %edi, %k1
3604 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3605 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
3606 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
3607 ; NoVLX-NEXT: kmovw %k0, %eax
3608 ; NoVLX-NEXT: movzwl %ax, %eax
3609 ; NoVLX-NEXT: vzeroupper
3612 %0 = bitcast <2 x i64> %__a to <2 x i64>
3613 %load = load i64, i64* %__b
3614 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
3615 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3616 %2 = icmp eq <2 x i64> %0, %1
3617 %3 = bitcast i8 %__u to <8 x i1>
3618 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
3619 %4 = and <2 x i1> %extract.i, %2
3620 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
3621 %6 = bitcast <64 x i1> %5 to i64
3626 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3627 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3628 ; VLX: # %bb.0: # %entry
3629 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3630 ; VLX-NEXT: kmovd %k0, %eax
3631 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3632 ; VLX-NEXT: vzeroupper
3635 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
3636 ; NoVLX: # %bb.0: # %entry
3637 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3638 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3639 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3640 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3641 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3642 ; NoVLX-NEXT: kmovw %k0, %eax
3643 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3644 ; NoVLX-NEXT: vzeroupper
3647 %0 = bitcast <4 x i64> %__a to <4 x i64>
3648 %1 = bitcast <4 x i64> %__b to <4 x i64>
3649 %2 = icmp eq <4 x i64> %0, %1
3650 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3651 %4 = bitcast <8 x i1> %3 to i8
3655 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3656 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3657 ; VLX: # %bb.0: # %entry
3658 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
3659 ; VLX-NEXT: kmovd %k0, %eax
3660 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3661 ; VLX-NEXT: vzeroupper
3664 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
3665 ; NoVLX: # %bb.0: # %entry
3666 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3667 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
3668 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3669 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3670 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3671 ; NoVLX-NEXT: kmovw %k0, %eax
3672 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3673 ; NoVLX-NEXT: vzeroupper
3676 %0 = bitcast <4 x i64> %__a to <4 x i64>
3677 %load = load <4 x i64>, <4 x i64>* %__b
3678 %1 = bitcast <4 x i64> %load to <4 x i64>
3679 %2 = icmp eq <4 x i64> %0, %1
3680 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3681 %4 = bitcast <8 x i1> %3 to i8
3685 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3686 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3687 ; VLX: # %bb.0: # %entry
3688 ; VLX-NEXT: kmovd %edi, %k1
3689 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3690 ; VLX-NEXT: kmovd %k0, %eax
3691 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3692 ; VLX-NEXT: vzeroupper
3695 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
3696 ; NoVLX: # %bb.0: # %entry
3697 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3698 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3699 ; NoVLX-NEXT: kmovw %edi, %k1
3700 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3701 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3702 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3703 ; NoVLX-NEXT: kmovw %k0, %eax
3704 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3705 ; NoVLX-NEXT: vzeroupper
3708 %0 = bitcast <4 x i64> %__a to <4 x i64>
3709 %1 = bitcast <4 x i64> %__b to <4 x i64>
3710 %2 = icmp eq <4 x i64> %0, %1
3711 %3 = bitcast i8 %__u to <8 x i1>
3712 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3713 %4 = and <4 x i1> %2, %extract.i
3714 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3715 %6 = bitcast <8 x i1> %5 to i8
3719 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3720 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3721 ; VLX: # %bb.0: # %entry
3722 ; VLX-NEXT: kmovd %edi, %k1
3723 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3724 ; VLX-NEXT: kmovd %k0, %eax
3725 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3726 ; VLX-NEXT: vzeroupper
3729 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
3730 ; NoVLX: # %bb.0: # %entry
3731 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3732 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
3733 ; NoVLX-NEXT: kmovw %edi, %k1
3734 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3735 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3736 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3737 ; NoVLX-NEXT: kmovw %k0, %eax
3738 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3739 ; NoVLX-NEXT: vzeroupper
3742 %0 = bitcast <4 x i64> %__a to <4 x i64>
3743 %load = load <4 x i64>, <4 x i64>* %__b
3744 %1 = bitcast <4 x i64> %load to <4 x i64>
3745 %2 = icmp eq <4 x i64> %0, %1
3746 %3 = bitcast i8 %__u to <8 x i1>
3747 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3748 %4 = and <4 x i1> %2, %extract.i
3749 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3750 %6 = bitcast <8 x i1> %5 to i8
3755 define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
3756 ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3757 ; VLX: # %bb.0: # %entry
3758 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3759 ; VLX-NEXT: kmovd %k0, %eax
3760 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3761 ; VLX-NEXT: vzeroupper
3764 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3765 ; NoVLX: # %bb.0: # %entry
3766 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3767 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
3768 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3769 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3770 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3771 ; NoVLX-NEXT: kmovw %k0, %eax
3772 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3773 ; NoVLX-NEXT: vzeroupper
3776 %0 = bitcast <4 x i64> %__a to <4 x i64>
3777 %load = load i64, i64* %__b
3778 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3779 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3780 %2 = icmp eq <4 x i64> %0, %1
3781 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3782 %4 = bitcast <8 x i1> %3 to i8
3786 define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
3787 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3788 ; VLX: # %bb.0: # %entry
3789 ; VLX-NEXT: kmovd %edi, %k1
3790 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3791 ; VLX-NEXT: kmovd %k0, %eax
3792 ; VLX-NEXT: # kill: def $al killed $al killed $eax
3793 ; VLX-NEXT: vzeroupper
3796 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
3797 ; NoVLX: # %bb.0: # %entry
3798 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3799 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
3800 ; NoVLX-NEXT: kmovw %edi, %k1
3801 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3802 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3803 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3804 ; NoVLX-NEXT: kmovw %k0, %eax
3805 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
3806 ; NoVLX-NEXT: vzeroupper
3809 %0 = bitcast <4 x i64> %__a to <4 x i64>
3810 %load = load i64, i64* %__b
3811 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3812 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3813 %2 = icmp eq <4 x i64> %0, %1
3814 %3 = bitcast i8 %__u to <8 x i1>
3815 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3816 %4 = and <4 x i1> %extract.i, %2
3817 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3818 %6 = bitcast <8 x i1> %5 to i8
3823 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3824 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3825 ; VLX: # %bb.0: # %entry
3826 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
3827 ; VLX-NEXT: kmovd %k0, %eax
3828 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3829 ; VLX-NEXT: vzeroupper
3832 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
3833 ; NoVLX: # %bb.0: # %entry
3834 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3835 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3836 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3837 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3838 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3839 ; NoVLX-NEXT: kmovw %k0, %eax
3840 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3841 ; NoVLX-NEXT: vzeroupper
3844 %0 = bitcast <4 x i64> %__a to <4 x i64>
3845 %1 = bitcast <4 x i64> %__b to <4 x i64>
3846 %2 = icmp eq <4 x i64> %0, %1
3847 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3848 %4 = bitcast <16 x i1> %3 to i16
3852 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3853 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3854 ; VLX: # %bb.0: # %entry
3855 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
3856 ; VLX-NEXT: kmovd %k0, %eax
3857 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3858 ; VLX-NEXT: vzeroupper
3861 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
3862 ; NoVLX: # %bb.0: # %entry
3863 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3864 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
3865 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3866 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3867 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3868 ; NoVLX-NEXT: kmovw %k0, %eax
3869 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3870 ; NoVLX-NEXT: vzeroupper
3873 %0 = bitcast <4 x i64> %__a to <4 x i64>
3874 %load = load <4 x i64>, <4 x i64>* %__b
3875 %1 = bitcast <4 x i64> %load to <4 x i64>
3876 %2 = icmp eq <4 x i64> %0, %1
3877 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3878 %4 = bitcast <16 x i1> %3 to i16
3882 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
3883 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3884 ; VLX: # %bb.0: # %entry
3885 ; VLX-NEXT: kmovd %edi, %k1
3886 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
3887 ; VLX-NEXT: kmovd %k0, %eax
3888 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3889 ; VLX-NEXT: vzeroupper
3892 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
3893 ; NoVLX: # %bb.0: # %entry
3894 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
3895 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3896 ; NoVLX-NEXT: kmovw %edi, %k1
3897 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3898 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3899 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3900 ; NoVLX-NEXT: kmovw %k0, %eax
3901 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3902 ; NoVLX-NEXT: vzeroupper
3905 %0 = bitcast <4 x i64> %__a to <4 x i64>
3906 %1 = bitcast <4 x i64> %__b to <4 x i64>
3907 %2 = icmp eq <4 x i64> %0, %1
3908 %3 = bitcast i8 %__u to <8 x i1>
3909 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3910 %4 = and <4 x i1> %2, %extract.i
3911 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3912 %6 = bitcast <16 x i1> %5 to i16
3916 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
3917 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3918 ; VLX: # %bb.0: # %entry
3919 ; VLX-NEXT: kmovd %edi, %k1
3920 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
3921 ; VLX-NEXT: kmovd %k0, %eax
3922 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3923 ; VLX-NEXT: vzeroupper
3926 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
3927 ; NoVLX: # %bb.0: # %entry
3928 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3929 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
3930 ; NoVLX-NEXT: kmovw %edi, %k1
3931 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3932 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3933 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3934 ; NoVLX-NEXT: kmovw %k0, %eax
3935 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3936 ; NoVLX-NEXT: vzeroupper
3939 %0 = bitcast <4 x i64> %__a to <4 x i64>
3940 %load = load <4 x i64>, <4 x i64>* %__b
3941 %1 = bitcast <4 x i64> %load to <4 x i64>
3942 %2 = icmp eq <4 x i64> %0, %1
3943 %3 = bitcast i8 %__u to <8 x i1>
3944 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3945 %4 = and <4 x i1> %2, %extract.i
3946 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3947 %6 = bitcast <16 x i1> %5 to i16
3952 define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
3953 ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3954 ; VLX: # %bb.0: # %entry
3955 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
3956 ; VLX-NEXT: kmovd %k0, %eax
3957 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3958 ; VLX-NEXT: vzeroupper
3961 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3962 ; NoVLX: # %bb.0: # %entry
3963 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3964 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
3965 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
3966 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
3967 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
3968 ; NoVLX-NEXT: kmovw %k0, %eax
3969 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
3970 ; NoVLX-NEXT: vzeroupper
3973 %0 = bitcast <4 x i64> %__a to <4 x i64>
3974 %load = load i64, i64* %__b
3975 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
3976 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
3977 %2 = icmp eq <4 x i64> %0, %1
3978 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
3979 %4 = bitcast <16 x i1> %3 to i16
3983 define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
3984 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3985 ; VLX: # %bb.0: # %entry
3986 ; VLX-NEXT: kmovd %edi, %k1
3987 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
3988 ; VLX-NEXT: kmovd %k0, %eax
3989 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
3990 ; VLX-NEXT: vzeroupper
3993 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
3994 ; NoVLX: # %bb.0: # %entry
3995 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3996 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
3997 ; NoVLX-NEXT: kmovw %edi, %k1
3998 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
3999 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4000 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4001 ; NoVLX-NEXT: kmovw %k0, %eax
4002 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4003 ; NoVLX-NEXT: vzeroupper
4006 %0 = bitcast <4 x i64> %__a to <4 x i64>
4007 %load = load i64, i64* %__b
4008 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4009 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4010 %2 = icmp eq <4 x i64> %0, %1
4011 %3 = bitcast i8 %__u to <8 x i1>
4012 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4013 %4 = and <4 x i1> %extract.i, %2
4014 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4015 %6 = bitcast <16 x i1> %5 to i16
4020 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4021 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
4022 ; VLX: # %bb.0: # %entry
4023 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
4024 ; VLX-NEXT: kmovd %k0, %eax
4025 ; VLX-NEXT: vzeroupper
4028 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
4029 ; NoVLX: # %bb.0: # %entry
4030 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4031 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4032 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4033 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4034 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4035 ; NoVLX-NEXT: kmovw %k0, %eax
4036 ; NoVLX-NEXT: vzeroupper
4039 %0 = bitcast <4 x i64> %__a to <4 x i64>
4040 %1 = bitcast <4 x i64> %__b to <4 x i64>
4041 %2 = icmp eq <4 x i64> %0, %1
4042 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4043 %4 = bitcast <32 x i1> %3 to i32
4047 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4048 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
4049 ; VLX: # %bb.0: # %entry
4050 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
4051 ; VLX-NEXT: kmovd %k0, %eax
4052 ; VLX-NEXT: vzeroupper
4055 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
4056 ; NoVLX: # %bb.0: # %entry
4057 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4058 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
4059 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4060 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4061 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4062 ; NoVLX-NEXT: kmovw %k0, %eax
4063 ; NoVLX-NEXT: vzeroupper
4066 %0 = bitcast <4 x i64> %__a to <4 x i64>
4067 %load = load <4 x i64>, <4 x i64>* %__b
4068 %1 = bitcast <4 x i64> %load to <4 x i64>
4069 %2 = icmp eq <4 x i64> %0, %1
4070 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4071 %4 = bitcast <32 x i1> %3 to i32
4075 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4076 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4077 ; VLX: # %bb.0: # %entry
4078 ; VLX-NEXT: kmovd %edi, %k1
4079 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4080 ; VLX-NEXT: kmovd %k0, %eax
4081 ; VLX-NEXT: vzeroupper
4084 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
4085 ; NoVLX: # %bb.0: # %entry
4086 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4087 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4088 ; NoVLX-NEXT: kmovw %edi, %k1
4089 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4090 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4091 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4092 ; NoVLX-NEXT: kmovw %k0, %eax
4093 ; NoVLX-NEXT: vzeroupper
4096 %0 = bitcast <4 x i64> %__a to <4 x i64>
4097 %1 = bitcast <4 x i64> %__b to <4 x i64>
4098 %2 = icmp eq <4 x i64> %0, %1
4099 %3 = bitcast i8 %__u to <8 x i1>
4100 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4101 %4 = and <4 x i1> %2, %extract.i
4102 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4103 %6 = bitcast <32 x i1> %5 to i32
4107 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4108 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4109 ; VLX: # %bb.0: # %entry
4110 ; VLX-NEXT: kmovd %edi, %k1
4111 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4112 ; VLX-NEXT: kmovd %k0, %eax
4113 ; VLX-NEXT: vzeroupper
4116 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
4117 ; NoVLX: # %bb.0: # %entry
4118 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4119 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
4120 ; NoVLX-NEXT: kmovw %edi, %k1
4121 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4122 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4123 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4124 ; NoVLX-NEXT: kmovw %k0, %eax
4125 ; NoVLX-NEXT: vzeroupper
4128 %0 = bitcast <4 x i64> %__a to <4 x i64>
4129 %load = load <4 x i64>, <4 x i64>* %__b
4130 %1 = bitcast <4 x i64> %load to <4 x i64>
4131 %2 = icmp eq <4 x i64> %0, %1
4132 %3 = bitcast i8 %__u to <8 x i1>
4133 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4134 %4 = and <4 x i1> %2, %extract.i
4135 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4136 %6 = bitcast <32 x i1> %5 to i32
4141 define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
4142 ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4143 ; VLX: # %bb.0: # %entry
4144 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4145 ; VLX-NEXT: kmovd %k0, %eax
4146 ; VLX-NEXT: vzeroupper
4149 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4150 ; NoVLX: # %bb.0: # %entry
4151 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4152 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
4153 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4154 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4155 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4156 ; NoVLX-NEXT: kmovw %k0, %eax
4157 ; NoVLX-NEXT: vzeroupper
4160 %0 = bitcast <4 x i64> %__a to <4 x i64>
4161 %load = load i64, i64* %__b
4162 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4163 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4164 %2 = icmp eq <4 x i64> %0, %1
4165 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4166 %4 = bitcast <32 x i1> %3 to i32
4170 define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
4171 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4172 ; VLX: # %bb.0: # %entry
4173 ; VLX-NEXT: kmovd %edi, %k1
4174 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4175 ; VLX-NEXT: kmovd %k0, %eax
4176 ; VLX-NEXT: vzeroupper
4179 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
4180 ; NoVLX: # %bb.0: # %entry
4181 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4182 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
4183 ; NoVLX-NEXT: kmovw %edi, %k1
4184 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4185 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4186 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4187 ; NoVLX-NEXT: kmovw %k0, %eax
4188 ; NoVLX-NEXT: vzeroupper
4191 %0 = bitcast <4 x i64> %__a to <4 x i64>
4192 %load = load i64, i64* %__b
4193 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4194 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4195 %2 = icmp eq <4 x i64> %0, %1
4196 %3 = bitcast i8 %__u to <8 x i1>
4197 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4198 %4 = and <4 x i1> %extract.i, %2
4199 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4200 %6 = bitcast <32 x i1> %5 to i32
4205 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4206 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4207 ; VLX: # %bb.0: # %entry
4208 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
4209 ; VLX-NEXT: kmovq %k0, %rax
4210 ; VLX-NEXT: vzeroupper
4213 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
4214 ; NoVLX: # %bb.0: # %entry
4215 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4216 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4217 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4218 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4219 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4220 ; NoVLX-NEXT: kmovw %k0, %eax
4221 ; NoVLX-NEXT: movzwl %ax, %eax
4222 ; NoVLX-NEXT: vzeroupper
4225 %0 = bitcast <4 x i64> %__a to <4 x i64>
4226 %1 = bitcast <4 x i64> %__b to <4 x i64>
4227 %2 = icmp eq <4 x i64> %0, %1
4228 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4229 %4 = bitcast <64 x i1> %3 to i64
4233 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4234 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4235 ; VLX: # %bb.0: # %entry
4236 ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
4237 ; VLX-NEXT: kmovq %k0, %rax
4238 ; VLX-NEXT: vzeroupper
4241 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
4242 ; NoVLX: # %bb.0: # %entry
4243 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4244 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
4245 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4246 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4247 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4248 ; NoVLX-NEXT: kmovw %k0, %eax
4249 ; NoVLX-NEXT: movzwl %ax, %eax
4250 ; NoVLX-NEXT: vzeroupper
4253 %0 = bitcast <4 x i64> %__a to <4 x i64>
4254 %load = load <4 x i64>, <4 x i64>* %__b
4255 %1 = bitcast <4 x i64> %load to <4 x i64>
4256 %2 = icmp eq <4 x i64> %0, %1
4257 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4258 %4 = bitcast <64 x i1> %3 to i64
4262 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
4263 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4264 ; VLX: # %bb.0: # %entry
4265 ; VLX-NEXT: kmovd %edi, %k1
4266 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
4267 ; VLX-NEXT: kmovq %k0, %rax
4268 ; VLX-NEXT: vzeroupper
4271 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
4272 ; NoVLX: # %bb.0: # %entry
4273 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
4274 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4275 ; NoVLX-NEXT: kmovw %edi, %k1
4276 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4277 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4278 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4279 ; NoVLX-NEXT: kmovw %k0, %eax
4280 ; NoVLX-NEXT: movzwl %ax, %eax
4281 ; NoVLX-NEXT: vzeroupper
4284 %0 = bitcast <4 x i64> %__a to <4 x i64>
4285 %1 = bitcast <4 x i64> %__b to <4 x i64>
4286 %2 = icmp eq <4 x i64> %0, %1
4287 %3 = bitcast i8 %__u to <8 x i1>
4288 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4289 %4 = and <4 x i1> %2, %extract.i
4290 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4291 %6 = bitcast <64 x i1> %5 to i64
4295 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
4296 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4297 ; VLX: # %bb.0: # %entry
4298 ; VLX-NEXT: kmovd %edi, %k1
4299 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
4300 ; VLX-NEXT: kmovq %k0, %rax
4301 ; VLX-NEXT: vzeroupper
4304 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
4305 ; NoVLX: # %bb.0: # %entry
4306 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4307 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
4308 ; NoVLX-NEXT: kmovw %edi, %k1
4309 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4310 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4311 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4312 ; NoVLX-NEXT: kmovw %k0, %eax
4313 ; NoVLX-NEXT: movzwl %ax, %eax
4314 ; NoVLX-NEXT: vzeroupper
4317 %0 = bitcast <4 x i64> %__a to <4 x i64>
4318 %load = load <4 x i64>, <4 x i64>* %__b
4319 %1 = bitcast <4 x i64> %load to <4 x i64>
4320 %2 = icmp eq <4 x i64> %0, %1
4321 %3 = bitcast i8 %__u to <8 x i1>
4322 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4323 %4 = and <4 x i1> %2, %extract.i
4324 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4325 %6 = bitcast <64 x i1> %5 to i64
4330 define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
4331 ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4332 ; VLX: # %bb.0: # %entry
4333 ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
4334 ; VLX-NEXT: kmovq %k0, %rax
4335 ; VLX-NEXT: vzeroupper
4338 ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4339 ; NoVLX: # %bb.0: # %entry
4340 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4341 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
4342 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4343 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4344 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4345 ; NoVLX-NEXT: kmovw %k0, %eax
4346 ; NoVLX-NEXT: movzwl %ax, %eax
4347 ; NoVLX-NEXT: vzeroupper
4350 %0 = bitcast <4 x i64> %__a to <4 x i64>
4351 %load = load i64, i64* %__b
4352 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4353 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4354 %2 = icmp eq <4 x i64> %0, %1
4355 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4356 %4 = bitcast <64 x i1> %3 to i64
4360 define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
4361 ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4362 ; VLX: # %bb.0: # %entry
4363 ; VLX-NEXT: kmovd %edi, %k1
4364 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
4365 ; VLX-NEXT: kmovq %k0, %rax
4366 ; VLX-NEXT: vzeroupper
4369 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
4370 ; NoVLX: # %bb.0: # %entry
4371 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4372 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
4373 ; NoVLX-NEXT: kmovw %edi, %k1
4374 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4375 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
4376 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
4377 ; NoVLX-NEXT: kmovw %k0, %eax
4378 ; NoVLX-NEXT: movzwl %ax, %eax
4379 ; NoVLX-NEXT: vzeroupper
4382 %0 = bitcast <4 x i64> %__a to <4 x i64>
4383 %load = load i64, i64* %__b
4384 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
4385 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
4386 %2 = icmp eq <4 x i64> %0, %1
4387 %3 = bitcast i8 %__u to <8 x i1>
4388 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4389 %4 = and <4 x i1> %extract.i, %2
4390 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
4391 %6 = bitcast <64 x i1> %5 to i64
4396 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4397 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4398 ; VLX: # %bb.0: # %entry
4399 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4400 ; VLX-NEXT: kmovd %k0, %eax
4401 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4402 ; VLX-NEXT: vzeroupper
4405 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
4406 ; NoVLX: # %bb.0: # %entry
4407 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4408 ; NoVLX-NEXT: kmovw %k0, %eax
4409 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4410 ; NoVLX-NEXT: vzeroupper
4413 %0 = bitcast <8 x i64> %__a to <8 x i64>
4414 %1 = bitcast <8 x i64> %__b to <8 x i64>
4415 %2 = icmp eq <8 x i64> %0, %1
4416 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4417 %4 = bitcast <16 x i1> %3 to i16
4421 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4422 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4423 ; VLX: # %bb.0: # %entry
4424 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4425 ; VLX-NEXT: kmovd %k0, %eax
4426 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4427 ; VLX-NEXT: vzeroupper
4430 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
4431 ; NoVLX: # %bb.0: # %entry
4432 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4433 ; NoVLX-NEXT: kmovw %k0, %eax
4434 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4435 ; NoVLX-NEXT: vzeroupper
4438 %0 = bitcast <8 x i64> %__a to <8 x i64>
4439 %load = load <8 x i64>, <8 x i64>* %__b
4440 %1 = bitcast <8 x i64> %load to <8 x i64>
4441 %2 = icmp eq <8 x i64> %0, %1
4442 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4443 %4 = bitcast <16 x i1> %3 to i16
4447 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4448 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4449 ; VLX: # %bb.0: # %entry
4450 ; VLX-NEXT: kmovd %edi, %k1
4451 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4452 ; VLX-NEXT: kmovd %k0, %eax
4453 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4454 ; VLX-NEXT: vzeroupper
4457 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
4458 ; NoVLX: # %bb.0: # %entry
4459 ; NoVLX-NEXT: kmovw %edi, %k1
4460 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4461 ; NoVLX-NEXT: kmovw %k0, %eax
4462 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4463 ; NoVLX-NEXT: vzeroupper
4466 %0 = bitcast <8 x i64> %__a to <8 x i64>
4467 %1 = bitcast <8 x i64> %__b to <8 x i64>
4468 %2 = icmp eq <8 x i64> %0, %1
4469 %3 = bitcast i8 %__u to <8 x i1>
4470 %4 = and <8 x i1> %2, %3
4471 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4472 %6 = bitcast <16 x i1> %5 to i16
4476 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4477 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4478 ; VLX: # %bb.0: # %entry
4479 ; VLX-NEXT: kmovd %edi, %k1
4480 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4481 ; VLX-NEXT: kmovd %k0, %eax
4482 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4483 ; VLX-NEXT: vzeroupper
4486 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
4487 ; NoVLX: # %bb.0: # %entry
4488 ; NoVLX-NEXT: kmovw %edi, %k1
4489 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4490 ; NoVLX-NEXT: kmovw %k0, %eax
4491 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4492 ; NoVLX-NEXT: vzeroupper
4495 %0 = bitcast <8 x i64> %__a to <8 x i64>
4496 %load = load <8 x i64>, <8 x i64>* %__b
4497 %1 = bitcast <8 x i64> %load to <8 x i64>
4498 %2 = icmp eq <8 x i64> %0, %1
4499 %3 = bitcast i8 %__u to <8 x i1>
4500 %4 = and <8 x i1> %2, %3
4501 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4502 %6 = bitcast <16 x i1> %5 to i16
4507 define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4508 ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4509 ; VLX: # %bb.0: # %entry
4510 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4511 ; VLX-NEXT: kmovd %k0, %eax
4512 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4513 ; VLX-NEXT: vzeroupper
4516 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4517 ; NoVLX: # %bb.0: # %entry
4518 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4519 ; NoVLX-NEXT: kmovw %k0, %eax
4520 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4521 ; NoVLX-NEXT: vzeroupper
4524 %0 = bitcast <8 x i64> %__a to <8 x i64>
4525 %load = load i64, i64* %__b
4526 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4527 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4528 %2 = icmp eq <8 x i64> %0, %1
4529 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4530 %4 = bitcast <16 x i1> %3 to i16
4534 define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4535 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4536 ; VLX: # %bb.0: # %entry
4537 ; VLX-NEXT: kmovd %edi, %k1
4538 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4539 ; VLX-NEXT: kmovd %k0, %eax
4540 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
4541 ; VLX-NEXT: vzeroupper
4544 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
4545 ; NoVLX: # %bb.0: # %entry
4546 ; NoVLX-NEXT: kmovw %edi, %k1
4547 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4548 ; NoVLX-NEXT: kmovw %k0, %eax
4549 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
4550 ; NoVLX-NEXT: vzeroupper
4553 %0 = bitcast <8 x i64> %__a to <8 x i64>
4554 %load = load i64, i64* %__b
4555 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4556 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4557 %2 = icmp eq <8 x i64> %0, %1
4558 %3 = bitcast i8 %__u to <8 x i1>
4559 %4 = and <8 x i1> %3, %2
4560 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4561 %6 = bitcast <16 x i1> %5 to i16
4566 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4567 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4568 ; VLX: # %bb.0: # %entry
4569 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4570 ; VLX-NEXT: kmovd %k0, %eax
4571 ; VLX-NEXT: vzeroupper
4574 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
4575 ; NoVLX: # %bb.0: # %entry
4576 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4577 ; NoVLX-NEXT: kmovw %k0, %eax
4578 ; NoVLX-NEXT: vzeroupper
4581 %0 = bitcast <8 x i64> %__a to <8 x i64>
4582 %1 = bitcast <8 x i64> %__b to <8 x i64>
4583 %2 = icmp eq <8 x i64> %0, %1
4584 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4585 %4 = bitcast <32 x i1> %3 to i32
4589 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4590 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4591 ; VLX: # %bb.0: # %entry
4592 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4593 ; VLX-NEXT: kmovd %k0, %eax
4594 ; VLX-NEXT: vzeroupper
4597 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
4598 ; NoVLX: # %bb.0: # %entry
4599 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4600 ; NoVLX-NEXT: kmovw %k0, %eax
4601 ; NoVLX-NEXT: vzeroupper
4604 %0 = bitcast <8 x i64> %__a to <8 x i64>
4605 %load = load <8 x i64>, <8 x i64>* %__b
4606 %1 = bitcast <8 x i64> %load to <8 x i64>
4607 %2 = icmp eq <8 x i64> %0, %1
4608 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4609 %4 = bitcast <32 x i1> %3 to i32
4613 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4614 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4615 ; VLX: # %bb.0: # %entry
4616 ; VLX-NEXT: kmovd %edi, %k1
4617 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4618 ; VLX-NEXT: kmovd %k0, %eax
4619 ; VLX-NEXT: vzeroupper
4622 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
4623 ; NoVLX: # %bb.0: # %entry
4624 ; NoVLX-NEXT: kmovw %edi, %k1
4625 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4626 ; NoVLX-NEXT: kmovw %k0, %eax
4627 ; NoVLX-NEXT: vzeroupper
4630 %0 = bitcast <8 x i64> %__a to <8 x i64>
4631 %1 = bitcast <8 x i64> %__b to <8 x i64>
4632 %2 = icmp eq <8 x i64> %0, %1
4633 %3 = bitcast i8 %__u to <8 x i1>
4634 %4 = and <8 x i1> %2, %3
4635 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4636 %6 = bitcast <32 x i1> %5 to i32
4640 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4641 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4642 ; VLX: # %bb.0: # %entry
4643 ; VLX-NEXT: kmovd %edi, %k1
4644 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4645 ; VLX-NEXT: kmovd %k0, %eax
4646 ; VLX-NEXT: vzeroupper
4649 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
4650 ; NoVLX: # %bb.0: # %entry
4651 ; NoVLX-NEXT: kmovw %edi, %k1
4652 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4653 ; NoVLX-NEXT: kmovw %k0, %eax
4654 ; NoVLX-NEXT: vzeroupper
4657 %0 = bitcast <8 x i64> %__a to <8 x i64>
4658 %load = load <8 x i64>, <8 x i64>* %__b
4659 %1 = bitcast <8 x i64> %load to <8 x i64>
4660 %2 = icmp eq <8 x i64> %0, %1
4661 %3 = bitcast i8 %__u to <8 x i1>
4662 %4 = and <8 x i1> %2, %3
4663 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4664 %6 = bitcast <32 x i1> %5 to i32
4669 define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4670 ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4671 ; VLX: # %bb.0: # %entry
4672 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4673 ; VLX-NEXT: kmovd %k0, %eax
4674 ; VLX-NEXT: vzeroupper
4677 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4678 ; NoVLX: # %bb.0: # %entry
4679 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4680 ; NoVLX-NEXT: kmovw %k0, %eax
4681 ; NoVLX-NEXT: vzeroupper
4684 %0 = bitcast <8 x i64> %__a to <8 x i64>
4685 %load = load i64, i64* %__b
4686 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4687 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4688 %2 = icmp eq <8 x i64> %0, %1
4689 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4690 %4 = bitcast <32 x i1> %3 to i32
4694 define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4695 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4696 ; VLX: # %bb.0: # %entry
4697 ; VLX-NEXT: kmovd %edi, %k1
4698 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4699 ; VLX-NEXT: kmovd %k0, %eax
4700 ; VLX-NEXT: vzeroupper
4703 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
4704 ; NoVLX: # %bb.0: # %entry
4705 ; NoVLX-NEXT: kmovw %edi, %k1
4706 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4707 ; NoVLX-NEXT: kmovw %k0, %eax
4708 ; NoVLX-NEXT: vzeroupper
4711 %0 = bitcast <8 x i64> %__a to <8 x i64>
4712 %load = load i64, i64* %__b
4713 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4714 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4715 %2 = icmp eq <8 x i64> %0, %1
4716 %3 = bitcast i8 %__u to <8 x i1>
4717 %4 = and <8 x i1> %3, %2
4718 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4719 %6 = bitcast <32 x i1> %5 to i32
4724 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4725 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4726 ; VLX: # %bb.0: # %entry
4727 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4728 ; VLX-NEXT: kmovq %k0, %rax
4729 ; VLX-NEXT: vzeroupper
4732 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
4733 ; NoVLX: # %bb.0: # %entry
4734 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
4735 ; NoVLX-NEXT: kmovw %k0, %eax
4736 ; NoVLX-NEXT: movzwl %ax, %eax
4737 ; NoVLX-NEXT: vzeroupper
4740 %0 = bitcast <8 x i64> %__a to <8 x i64>
4741 %1 = bitcast <8 x i64> %__b to <8 x i64>
4742 %2 = icmp eq <8 x i64> %0, %1
4743 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4744 %4 = bitcast <64 x i1> %3 to i64
4748 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4749 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4750 ; VLX: # %bb.0: # %entry
4751 ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4752 ; VLX-NEXT: kmovq %k0, %rax
4753 ; VLX-NEXT: vzeroupper
4756 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
4757 ; NoVLX: # %bb.0: # %entry
4758 ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
4759 ; NoVLX-NEXT: kmovw %k0, %eax
4760 ; NoVLX-NEXT: movzwl %ax, %eax
4761 ; NoVLX-NEXT: vzeroupper
4764 %0 = bitcast <8 x i64> %__a to <8 x i64>
4765 %load = load <8 x i64>, <8 x i64>* %__b
4766 %1 = bitcast <8 x i64> %load to <8 x i64>
4767 %2 = icmp eq <8 x i64> %0, %1
4768 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4769 %4 = bitcast <64 x i1> %3 to i64
4773 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
4774 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4775 ; VLX: # %bb.0: # %entry
4776 ; VLX-NEXT: kmovd %edi, %k1
4777 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4778 ; VLX-NEXT: kmovq %k0, %rax
4779 ; VLX-NEXT: vzeroupper
4782 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
4783 ; NoVLX: # %bb.0: # %entry
4784 ; NoVLX-NEXT: kmovw %edi, %k1
4785 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
4786 ; NoVLX-NEXT: kmovw %k0, %eax
4787 ; NoVLX-NEXT: movzwl %ax, %eax
4788 ; NoVLX-NEXT: vzeroupper
4791 %0 = bitcast <8 x i64> %__a to <8 x i64>
4792 %1 = bitcast <8 x i64> %__b to <8 x i64>
4793 %2 = icmp eq <8 x i64> %0, %1
4794 %3 = bitcast i8 %__u to <8 x i1>
4795 %4 = and <8 x i1> %2, %3
4796 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4797 %6 = bitcast <64 x i1> %5 to i64
4801 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
4802 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4803 ; VLX: # %bb.0: # %entry
4804 ; VLX-NEXT: kmovd %edi, %k1
4805 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4806 ; VLX-NEXT: kmovq %k0, %rax
4807 ; VLX-NEXT: vzeroupper
4810 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
4811 ; NoVLX: # %bb.0: # %entry
4812 ; NoVLX-NEXT: kmovw %edi, %k1
4813 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
4814 ; NoVLX-NEXT: kmovw %k0, %eax
4815 ; NoVLX-NEXT: movzwl %ax, %eax
4816 ; NoVLX-NEXT: vzeroupper
4819 %0 = bitcast <8 x i64> %__a to <8 x i64>
4820 %load = load <8 x i64>, <8 x i64>* %__b
4821 %1 = bitcast <8 x i64> %load to <8 x i64>
4822 %2 = icmp eq <8 x i64> %0, %1
4823 %3 = bitcast i8 %__u to <8 x i1>
4824 %4 = and <8 x i1> %2, %3
4825 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4826 %6 = bitcast <64 x i1> %5 to i64
4831 define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
4832 ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4833 ; VLX: # %bb.0: # %entry
4834 ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4835 ; VLX-NEXT: kmovq %k0, %rax
4836 ; VLX-NEXT: vzeroupper
4839 ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4840 ; NoVLX: # %bb.0: # %entry
4841 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
4842 ; NoVLX-NEXT: kmovw %k0, %eax
4843 ; NoVLX-NEXT: movzwl %ax, %eax
4844 ; NoVLX-NEXT: vzeroupper
4847 %0 = bitcast <8 x i64> %__a to <8 x i64>
4848 %load = load i64, i64* %__b
4849 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4850 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4851 %2 = icmp eq <8 x i64> %0, %1
4852 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4853 %4 = bitcast <64 x i1> %3 to i64
4857 define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
4858 ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4859 ; VLX: # %bb.0: # %entry
4860 ; VLX-NEXT: kmovd %edi, %k1
4861 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4862 ; VLX-NEXT: kmovq %k0, %rax
4863 ; VLX-NEXT: vzeroupper
4866 ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
4867 ; NoVLX: # %bb.0: # %entry
4868 ; NoVLX-NEXT: kmovw %edi, %k1
4869 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
4870 ; NoVLX-NEXT: kmovw %k0, %eax
4871 ; NoVLX-NEXT: movzwl %ax, %eax
4872 ; NoVLX-NEXT: vzeroupper
4875 %0 = bitcast <8 x i64> %__a to <8 x i64>
4876 %load = load i64, i64* %__b
4877 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
4878 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4879 %2 = icmp eq <8 x i64> %0, %1
4880 %3 = bitcast i8 %__u to <8 x i1>
4881 %4 = and <8 x i1> %3, %2
4882 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4883 %6 = bitcast <64 x i1> %5 to i64
4888 define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4889 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4890 ; VLX: # %bb.0: # %entry
4891 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
4892 ; VLX-NEXT: kmovd %k0, %eax
4895 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
4896 ; NoVLX: # %bb.0: # %entry
4897 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4898 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4899 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4900 ; NoVLX-NEXT: kmovw %k0, %eax
4901 ; NoVLX-NEXT: vzeroupper
4904 %0 = bitcast <2 x i64> %__a to <16 x i8>
4905 %1 = bitcast <2 x i64> %__b to <16 x i8>
4906 %2 = icmp sgt <16 x i8> %0, %1
4907 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4908 %4 = bitcast <32 x i1> %3 to i32
4912 define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4913 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4914 ; VLX: # %bb.0: # %entry
4915 ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
4916 ; VLX-NEXT: kmovd %k0, %eax
4919 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
4920 ; NoVLX: # %bb.0: # %entry
4921 ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
4922 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4923 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4924 ; NoVLX-NEXT: kmovw %k0, %eax
4925 ; NoVLX-NEXT: vzeroupper
4928 %0 = bitcast <2 x i64> %__a to <16 x i8>
4929 %load = load <2 x i64>, <2 x i64>* %__b
4930 %1 = bitcast <2 x i64> %load to <16 x i8>
4931 %2 = icmp sgt <16 x i8> %0, %1
4932 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4933 %4 = bitcast <32 x i1> %3 to i32
4937 define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4938 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4939 ; VLX: # %bb.0: # %entry
4940 ; VLX-NEXT: kmovd %edi, %k1
4941 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
4942 ; VLX-NEXT: kmovd %k0, %eax
4945 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
4946 ; NoVLX: # %bb.0: # %entry
4947 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
4948 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4949 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4950 ; NoVLX-NEXT: kmovw %k0, %eax
4951 ; NoVLX-NEXT: andl %edi, %eax
4952 ; NoVLX-NEXT: vzeroupper
4955 %0 = bitcast <2 x i64> %__a to <16 x i8>
4956 %1 = bitcast <2 x i64> %__b to <16 x i8>
4957 %2 = icmp sgt <16 x i8> %0, %1
4958 %3 = bitcast i16 %__u to <16 x i1>
4959 %4 = and <16 x i1> %2, %3
4960 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4961 %6 = bitcast <32 x i1> %5 to i32
4965 define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
4966 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4967 ; VLX: # %bb.0: # %entry
4968 ; VLX-NEXT: kmovd %edi, %k1
4969 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
4970 ; VLX-NEXT: kmovd %k0, %eax
4973 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
4974 ; NoVLX: # %bb.0: # %entry
4975 ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
4976 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
4977 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
4978 ; NoVLX-NEXT: kmovw %k0, %eax
4979 ; NoVLX-NEXT: andl %edi, %eax
4980 ; NoVLX-NEXT: vzeroupper
4983 %0 = bitcast <2 x i64> %__a to <16 x i8>
4984 %load = load <2 x i64>, <2 x i64>* %__b
4985 %1 = bitcast <2 x i64> %load to <16 x i8>
4986 %2 = icmp sgt <16 x i8> %0, %1
4987 %3 = bitcast i16 %__u to <16 x i1>
4988 %4 = and <16 x i1> %2, %3
4989 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
4990 %6 = bitcast <32 x i1> %5 to i32
4995 define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
4996 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
4997 ; VLX: # %bb.0: # %entry
4998 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
4999 ; VLX-NEXT: kmovq %k0, %rax
5002 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
5003 ; NoVLX: # %bb.0: # %entry
5004 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
5005 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5006 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5007 ; NoVLX-NEXT: kmovw %k0, %eax
5008 ; NoVLX-NEXT: movzwl %ax, %eax
5009 ; NoVLX-NEXT: vzeroupper
5012 %0 = bitcast <2 x i64> %__a to <16 x i8>
5013 %1 = bitcast <2 x i64> %__b to <16 x i8>
5014 %2 = icmp sgt <16 x i8> %0, %1
5015 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5016 %4 = bitcast <64 x i1> %3 to i64
5020 define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5021 ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
5022 ; VLX: # %bb.0: # %entry
5023 ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
5024 ; VLX-NEXT: kmovq %k0, %rax
5027 ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
5028 ; NoVLX: # %bb.0: # %entry
5029 ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
5030 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5031 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5032 ; NoVLX-NEXT: kmovw %k0, %eax
5033 ; NoVLX-NEXT: movzwl %ax, %eax
5034 ; NoVLX-NEXT: vzeroupper
5037 %0 = bitcast <2 x i64> %__a to <16 x i8>
5038 %load = load <2 x i64>, <2 x i64>* %__b
5039 %1 = bitcast <2 x i64> %load to <16 x i8>
5040 %2 = icmp sgt <16 x i8> %0, %1
5041 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5042 %4 = bitcast <64 x i1> %3 to i64
5046 define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5047 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
5048 ; VLX: # %bb.0: # %entry
5049 ; VLX-NEXT: kmovd %edi, %k1
5050 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
5051 ; VLX-NEXT: kmovq %k0, %rax
5054 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
5055 ; NoVLX: # %bb.0: # %entry
5056 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
5057 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5058 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5059 ; NoVLX-NEXT: kmovw %k0, %eax
5060 ; NoVLX-NEXT: andl %edi, %eax
5061 ; NoVLX-NEXT: vzeroupper
5064 %0 = bitcast <2 x i64> %__a to <16 x i8>
5065 %1 = bitcast <2 x i64> %__b to <16 x i8>
5066 %2 = icmp sgt <16 x i8> %0, %1
5067 %3 = bitcast i16 %__u to <16 x i1>
5068 %4 = and <16 x i1> %2, %3
5069 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5070 %6 = bitcast <64 x i1> %5 to i64
5074 define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5075 ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5076 ; VLX: # %bb.0: # %entry
5077 ; VLX-NEXT: kmovd %edi, %k1
5078 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
5079 ; VLX-NEXT: kmovq %k0, %rax
5082 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
5083 ; NoVLX: # %bb.0: # %entry
5084 ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
5085 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5086 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5087 ; NoVLX-NEXT: kmovw %k0, %eax
5088 ; NoVLX-NEXT: andl %edi, %eax
5089 ; NoVLX-NEXT: vzeroupper
5092 %0 = bitcast <2 x i64> %__a to <16 x i8>
5093 %load = load <2 x i64>, <2 x i64>* %__b
5094 %1 = bitcast <2 x i64> %load to <16 x i8>
5095 %2 = icmp sgt <16 x i8> %0, %1
5096 %3 = bitcast i16 %__u to <16 x i1>
5097 %4 = and <16 x i1> %2, %3
5098 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5099 %6 = bitcast <64 x i1> %5 to i64
5104 define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5105 ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5106 ; VLX: # %bb.0: # %entry
5107 ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
5108 ; VLX-NEXT: kmovq %k0, %rax
5109 ; VLX-NEXT: vzeroupper
5112 ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
5113 ; NoVLX: # %bb.0: # %entry
5114 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
5115 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5116 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5117 ; NoVLX-NEXT: kmovw %k0, %ecx
5118 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5119 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5120 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5121 ; NoVLX-NEXT: kmovw %k0, %eax
5122 ; NoVLX-NEXT: shll $16, %eax
5123 ; NoVLX-NEXT: orl %ecx, %eax
5124 ; NoVLX-NEXT: vzeroupper
5127 %0 = bitcast <4 x i64> %__a to <32 x i8>
5128 %1 = bitcast <4 x i64> %__b to <32 x i8>
5129 %2 = icmp sgt <32 x i8> %0, %1
5130 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5131 %4 = bitcast <64 x i1> %3 to i64
5135 define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5136 ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5137 ; VLX: # %bb.0: # %entry
5138 ; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
5139 ; VLX-NEXT: kmovq %k0, %rax
5140 ; VLX-NEXT: vzeroupper
5143 ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
5144 ; NoVLX: # %bb.0: # %entry
5145 ; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0
5146 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5147 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5148 ; NoVLX-NEXT: kmovw %k0, %ecx
5149 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5150 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5151 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5152 ; NoVLX-NEXT: kmovw %k0, %eax
5153 ; NoVLX-NEXT: shll $16, %eax
5154 ; NoVLX-NEXT: orl %ecx, %eax
5155 ; NoVLX-NEXT: vzeroupper
5158 %0 = bitcast <4 x i64> %__a to <32 x i8>
5159 %load = load <4 x i64>, <4 x i64>* %__b
5160 %1 = bitcast <4 x i64> %load to <32 x i8>
5161 %2 = icmp sgt <32 x i8> %0, %1
5162 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5163 %4 = bitcast <64 x i1> %3 to i64
5167 define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5168 ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5169 ; VLX: # %bb.0: # %entry
5170 ; VLX-NEXT: kmovd %edi, %k1
5171 ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
5172 ; VLX-NEXT: kmovq %k0, %rax
5173 ; VLX-NEXT: vzeroupper
5176 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
5177 ; NoVLX: # %bb.0: # %entry
5178 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
5179 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5180 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5181 ; NoVLX-NEXT: kmovw %k0, %eax
5182 ; NoVLX-NEXT: andl %edi, %eax
5183 ; NoVLX-NEXT: shrl $16, %edi
5184 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5185 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5186 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5187 ; NoVLX-NEXT: kmovw %k0, %ecx
5188 ; NoVLX-NEXT: andl %edi, %ecx
5189 ; NoVLX-NEXT: shll $16, %ecx
5190 ; NoVLX-NEXT: movzwl %ax, %eax
5191 ; NoVLX-NEXT: orl %ecx, %eax
5192 ; NoVLX-NEXT: vzeroupper
5195 %0 = bitcast <4 x i64> %__a to <32 x i8>
5196 %1 = bitcast <4 x i64> %__b to <32 x i8>
5197 %2 = icmp sgt <32 x i8> %0, %1
5198 %3 = bitcast i32 %__u to <32 x i1>
5199 %4 = and <32 x i1> %2, %3
5200 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5201 %6 = bitcast <64 x i1> %5 to i64
5205 define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5206 ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5207 ; VLX: # %bb.0: # %entry
5208 ; VLX-NEXT: kmovd %edi, %k1
5209 ; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
5210 ; VLX-NEXT: kmovq %k0, %rax
5211 ; VLX-NEXT: vzeroupper
5214 ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
5215 ; NoVLX: # %bb.0: # %entry
5216 ; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
5217 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
5218 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5219 ; NoVLX-NEXT: kmovw %k0, %eax
5220 ; NoVLX-NEXT: andl %edi, %eax
5221 ; NoVLX-NEXT: shrl $16, %edi
5222 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
5223 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
5224 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5225 ; NoVLX-NEXT: kmovw %k0, %ecx
5226 ; NoVLX-NEXT: andl %edi, %ecx
5227 ; NoVLX-NEXT: shll $16, %ecx
5228 ; NoVLX-NEXT: movzwl %ax, %eax
5229 ; NoVLX-NEXT: orl %ecx, %eax
5230 ; NoVLX-NEXT: vzeroupper
5233 %0 = bitcast <4 x i64> %__a to <32 x i8>
5234 %load = load <4 x i64>, <4 x i64>* %__b
5235 %1 = bitcast <4 x i64> %load to <32 x i8>
5236 %2 = icmp sgt <32 x i8> %0, %1
5237 %3 = bitcast i32 %__u to <32 x i1>
5238 %4 = and <32 x i1> %2, %3
5239 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5240 %6 = bitcast <64 x i1> %5 to i64
5245 define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5246 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5247 ; VLX: # %bb.0: # %entry
5248 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5249 ; VLX-NEXT: kmovd %k0, %eax
5250 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5253 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
5254 ; NoVLX: # %bb.0: # %entry
5255 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5256 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5257 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5258 ; NoVLX-NEXT: kmovw %k0, %eax
5259 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5260 ; NoVLX-NEXT: vzeroupper
5263 %0 = bitcast <2 x i64> %__a to <8 x i16>
5264 %1 = bitcast <2 x i64> %__b to <8 x i16>
5265 %2 = icmp sgt <8 x i16> %0, %1
5266 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5267 %4 = bitcast <16 x i1> %3 to i16
5271 define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5272 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5273 ; VLX: # %bb.0: # %entry
5274 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5275 ; VLX-NEXT: kmovd %k0, %eax
5276 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5279 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
5280 ; NoVLX: # %bb.0: # %entry
5281 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5282 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5283 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5284 ; NoVLX-NEXT: kmovw %k0, %eax
5285 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5286 ; NoVLX-NEXT: vzeroupper
5289 %0 = bitcast <2 x i64> %__a to <8 x i16>
5290 %load = load <2 x i64>, <2 x i64>* %__b
5291 %1 = bitcast <2 x i64> %load to <8 x i16>
5292 %2 = icmp sgt <8 x i16> %0, %1
5293 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5294 %4 = bitcast <16 x i1> %3 to i16
5298 define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5299 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5300 ; VLX: # %bb.0: # %entry
5301 ; VLX-NEXT: kmovd %edi, %k1
5302 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5303 ; VLX-NEXT: kmovd %k0, %eax
5304 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5307 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
5308 ; NoVLX: # %bb.0: # %entry
5309 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5310 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5311 ; NoVLX-NEXT: kmovw %edi, %k1
5312 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5313 ; NoVLX-NEXT: kmovw %k0, %eax
5314 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5315 ; NoVLX-NEXT: vzeroupper
5318 %0 = bitcast <2 x i64> %__a to <8 x i16>
5319 %1 = bitcast <2 x i64> %__b to <8 x i16>
5320 %2 = icmp sgt <8 x i16> %0, %1
5321 %3 = bitcast i8 %__u to <8 x i1>
5322 %4 = and <8 x i1> %2, %3
5323 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5324 %6 = bitcast <16 x i1> %5 to i16
5328 define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5329 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5330 ; VLX: # %bb.0: # %entry
5331 ; VLX-NEXT: kmovd %edi, %k1
5332 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5333 ; VLX-NEXT: kmovd %k0, %eax
5334 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
5337 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
5338 ; NoVLX: # %bb.0: # %entry
5339 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5340 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5341 ; NoVLX-NEXT: kmovw %edi, %k1
5342 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5343 ; NoVLX-NEXT: kmovw %k0, %eax
5344 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
5345 ; NoVLX-NEXT: vzeroupper
5348 %0 = bitcast <2 x i64> %__a to <8 x i16>
5349 %load = load <2 x i64>, <2 x i64>* %__b
5350 %1 = bitcast <2 x i64> %load to <8 x i16>
5351 %2 = icmp sgt <8 x i16> %0, %1
5352 %3 = bitcast i8 %__u to <8 x i1>
5353 %4 = and <8 x i1> %2, %3
5354 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5355 %6 = bitcast <16 x i1> %5 to i16
5360 define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5361 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5362 ; VLX: # %bb.0: # %entry
5363 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5364 ; VLX-NEXT: kmovd %k0, %eax
5367 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
5368 ; NoVLX: # %bb.0: # %entry
5369 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5370 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5371 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5372 ; NoVLX-NEXT: kmovw %k0, %eax
5373 ; NoVLX-NEXT: vzeroupper
5376 %0 = bitcast <2 x i64> %__a to <8 x i16>
5377 %1 = bitcast <2 x i64> %__b to <8 x i16>
5378 %2 = icmp sgt <8 x i16> %0, %1
5379 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5380 %4 = bitcast <32 x i1> %3 to i32
5384 define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5385 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5386 ; VLX: # %bb.0: # %entry
5387 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5388 ; VLX-NEXT: kmovd %k0, %eax
5391 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
5392 ; NoVLX: # %bb.0: # %entry
5393 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5394 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5395 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5396 ; NoVLX-NEXT: kmovw %k0, %eax
5397 ; NoVLX-NEXT: vzeroupper
5400 %0 = bitcast <2 x i64> %__a to <8 x i16>
5401 %load = load <2 x i64>, <2 x i64>* %__b
5402 %1 = bitcast <2 x i64> %load to <8 x i16>
5403 %2 = icmp sgt <8 x i16> %0, %1
5404 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5405 %4 = bitcast <32 x i1> %3 to i32
5409 define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5410 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5411 ; VLX: # %bb.0: # %entry
5412 ; VLX-NEXT: kmovd %edi, %k1
5413 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5414 ; VLX-NEXT: kmovd %k0, %eax
5417 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
5418 ; NoVLX: # %bb.0: # %entry
5419 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5420 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5421 ; NoVLX-NEXT: kmovw %edi, %k1
5422 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5423 ; NoVLX-NEXT: kmovw %k0, %eax
5424 ; NoVLX-NEXT: vzeroupper
5427 %0 = bitcast <2 x i64> %__a to <8 x i16>
5428 %1 = bitcast <2 x i64> %__b to <8 x i16>
5429 %2 = icmp sgt <8 x i16> %0, %1
5430 %3 = bitcast i8 %__u to <8 x i1>
5431 %4 = and <8 x i1> %2, %3
5432 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5433 %6 = bitcast <32 x i1> %5 to i32
5437 define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5438 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5439 ; VLX: # %bb.0: # %entry
5440 ; VLX-NEXT: kmovd %edi, %k1
5441 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5442 ; VLX-NEXT: kmovd %k0, %eax
5445 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
5446 ; NoVLX: # %bb.0: # %entry
5447 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5448 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5449 ; NoVLX-NEXT: kmovw %edi, %k1
5450 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5451 ; NoVLX-NEXT: kmovw %k0, %eax
5452 ; NoVLX-NEXT: vzeroupper
5455 %0 = bitcast <2 x i64> %__a to <8 x i16>
5456 %load = load <2 x i64>, <2 x i64>* %__b
5457 %1 = bitcast <2 x i64> %load to <8 x i16>
5458 %2 = icmp sgt <8 x i16> %0, %1
5459 %3 = bitcast i8 %__u to <8 x i1>
5460 %4 = and <8 x i1> %2, %3
5461 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5462 %6 = bitcast <32 x i1> %5 to i32
5467 define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5468 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5469 ; VLX: # %bb.0: # %entry
5470 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
5471 ; VLX-NEXT: kmovq %k0, %rax
5474 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
5475 ; NoVLX: # %bb.0: # %entry
5476 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5477 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5478 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5479 ; NoVLX-NEXT: kmovw %k0, %eax
5480 ; NoVLX-NEXT: movzwl %ax, %eax
5481 ; NoVLX-NEXT: vzeroupper
5484 %0 = bitcast <2 x i64> %__a to <8 x i16>
5485 %1 = bitcast <2 x i64> %__b to <8 x i16>
5486 %2 = icmp sgt <8 x i16> %0, %1
5487 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5488 %4 = bitcast <64 x i1> %3 to i64
5492 define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5493 ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5494 ; VLX: # %bb.0: # %entry
5495 ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
5496 ; VLX-NEXT: kmovq %k0, %rax
5499 ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
5500 ; NoVLX: # %bb.0: # %entry
5501 ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
5502 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5503 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
5504 ; NoVLX-NEXT: kmovw %k0, %eax
5505 ; NoVLX-NEXT: movzwl %ax, %eax
5506 ; NoVLX-NEXT: vzeroupper
5509 %0 = bitcast <2 x i64> %__a to <8 x i16>
5510 %load = load <2 x i64>, <2 x i64>* %__b
5511 %1 = bitcast <2 x i64> %load to <8 x i16>
5512 %2 = icmp sgt <8 x i16> %0, %1
5513 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5514 %4 = bitcast <64 x i1> %3 to i64
5518 define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5519 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5520 ; VLX: # %bb.0: # %entry
5521 ; VLX-NEXT: kmovd %edi, %k1
5522 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
5523 ; VLX-NEXT: kmovq %k0, %rax
5526 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
5527 ; NoVLX: # %bb.0: # %entry
5528 ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
5529 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5530 ; NoVLX-NEXT: kmovw %edi, %k1
5531 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5532 ; NoVLX-NEXT: kmovw %k0, %eax
5533 ; NoVLX-NEXT: movzwl %ax, %eax
5534 ; NoVLX-NEXT: vzeroupper
5537 %0 = bitcast <2 x i64> %__a to <8 x i16>
5538 %1 = bitcast <2 x i64> %__b to <8 x i16>
5539 %2 = icmp sgt <8 x i16> %0, %1
5540 %3 = bitcast i8 %__u to <8 x i1>
5541 %4 = and <8 x i1> %2, %3
5542 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5543 %6 = bitcast <64 x i1> %5 to i64
5547 define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5548 ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5549 ; VLX: # %bb.0: # %entry
5550 ; VLX-NEXT: kmovd %edi, %k1
5551 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
5552 ; VLX-NEXT: kmovq %k0, %rax
5555 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
5556 ; NoVLX: # %bb.0: # %entry
5557 ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
5558 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
5559 ; NoVLX-NEXT: kmovw %edi, %k1
5560 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
5561 ; NoVLX-NEXT: kmovw %k0, %eax
5562 ; NoVLX-NEXT: movzwl %ax, %eax
5563 ; NoVLX-NEXT: vzeroupper
5566 %0 = bitcast <2 x i64> %__a to <8 x i16>
5567 %load = load <2 x i64>, <2 x i64>* %__b
5568 %1 = bitcast <2 x i64> %load to <8 x i16>
5569 %2 = icmp sgt <8 x i16> %0, %1
5570 %3 = bitcast i8 %__u to <8 x i1>
5571 %4 = and <8 x i1> %2, %3
5572 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5573 %6 = bitcast <64 x i1> %5 to i64
5578 define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5579 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5580 ; VLX: # %bb.0: # %entry
5581 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
5582 ; VLX-NEXT: kmovd %k0, %eax
5583 ; VLX-NEXT: vzeroupper
5586 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
5587 ; NoVLX: # %bb.0: # %entry
5588 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5589 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5590 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5591 ; NoVLX-NEXT: kmovw %k0, %eax
5592 ; NoVLX-NEXT: vzeroupper
5595 %0 = bitcast <4 x i64> %__a to <16 x i16>
5596 %1 = bitcast <4 x i64> %__b to <16 x i16>
5597 %2 = icmp sgt <16 x i16> %0, %1
5598 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5599 %4 = bitcast <32 x i1> %3 to i32
5603 define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5604 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5605 ; VLX: # %bb.0: # %entry
5606 ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
5607 ; VLX-NEXT: kmovd %k0, %eax
5608 ; VLX-NEXT: vzeroupper
5611 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
5612 ; NoVLX: # %bb.0: # %entry
5613 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5614 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5615 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5616 ; NoVLX-NEXT: kmovw %k0, %eax
5617 ; NoVLX-NEXT: vzeroupper
5620 %0 = bitcast <4 x i64> %__a to <16 x i16>
5621 %load = load <4 x i64>, <4 x i64>* %__b
5622 %1 = bitcast <4 x i64> %load to <16 x i16>
5623 %2 = icmp sgt <16 x i16> %0, %1
5624 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5625 %4 = bitcast <32 x i1> %3 to i32
5629 define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5630 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5631 ; VLX: # %bb.0: # %entry
5632 ; VLX-NEXT: kmovd %edi, %k1
5633 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5634 ; VLX-NEXT: kmovd %k0, %eax
5635 ; VLX-NEXT: vzeroupper
5638 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
5639 ; NoVLX: # %bb.0: # %entry
5640 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5641 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5642 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5643 ; NoVLX-NEXT: kmovw %k0, %eax
5644 ; NoVLX-NEXT: andl %edi, %eax
5645 ; NoVLX-NEXT: vzeroupper
5648 %0 = bitcast <4 x i64> %__a to <16 x i16>
5649 %1 = bitcast <4 x i64> %__b to <16 x i16>
5650 %2 = icmp sgt <16 x i16> %0, %1
5651 %3 = bitcast i16 %__u to <16 x i1>
5652 %4 = and <16 x i1> %2, %3
5653 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5654 %6 = bitcast <32 x i1> %5 to i32
5658 define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5659 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5660 ; VLX: # %bb.0: # %entry
5661 ; VLX-NEXT: kmovd %edi, %k1
5662 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5663 ; VLX-NEXT: kmovd %k0, %eax
5664 ; VLX-NEXT: vzeroupper
5667 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
5668 ; NoVLX: # %bb.0: # %entry
5669 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
5670 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5671 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5672 ; NoVLX-NEXT: kmovw %k0, %eax
5673 ; NoVLX-NEXT: andl %edi, %eax
5674 ; NoVLX-NEXT: vzeroupper
5677 %0 = bitcast <4 x i64> %__a to <16 x i16>
5678 %load = load <4 x i64>, <4 x i64>* %__b
5679 %1 = bitcast <4 x i64> %load to <16 x i16>
5680 %2 = icmp sgt <16 x i16> %0, %1
5681 %3 = bitcast i16 %__u to <16 x i1>
5682 %4 = and <16 x i1> %2, %3
5683 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5684 %6 = bitcast <32 x i1> %5 to i32
5689 define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5690 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5691 ; VLX: # %bb.0: # %entry
5692 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
5693 ; VLX-NEXT: kmovq %k0, %rax
5694 ; VLX-NEXT: vzeroupper
5697 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
5698 ; NoVLX: # %bb.0: # %entry
5699 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5700 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5701 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5702 ; NoVLX-NEXT: kmovw %k0, %eax
5703 ; NoVLX-NEXT: movzwl %ax, %eax
5704 ; NoVLX-NEXT: vzeroupper
5707 %0 = bitcast <4 x i64> %__a to <16 x i16>
5708 %1 = bitcast <4 x i64> %__b to <16 x i16>
5709 %2 = icmp sgt <16 x i16> %0, %1
5710 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5711 %4 = bitcast <64 x i1> %3 to i64
5715 define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5716 ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5717 ; VLX: # %bb.0: # %entry
5718 ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
5719 ; VLX-NEXT: kmovq %k0, %rax
5720 ; VLX-NEXT: vzeroupper
5723 ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
5724 ; NoVLX: # %bb.0: # %entry
5725 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5726 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5727 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5728 ; NoVLX-NEXT: kmovw %k0, %eax
5729 ; NoVLX-NEXT: movzwl %ax, %eax
5730 ; NoVLX-NEXT: vzeroupper
5733 %0 = bitcast <4 x i64> %__a to <16 x i16>
5734 %load = load <4 x i64>, <4 x i64>* %__b
5735 %1 = bitcast <4 x i64> %load to <16 x i16>
5736 %2 = icmp sgt <16 x i16> %0, %1
5737 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5738 %4 = bitcast <64 x i1> %3 to i64
5742 define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
5743 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5744 ; VLX: # %bb.0: # %entry
5745 ; VLX-NEXT: kmovd %edi, %k1
5746 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
5747 ; VLX-NEXT: kmovq %k0, %rax
5748 ; VLX-NEXT: vzeroupper
5751 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
5752 ; NoVLX: # %bb.0: # %entry
5753 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5754 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5755 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5756 ; NoVLX-NEXT: kmovw %k0, %eax
5757 ; NoVLX-NEXT: andl %edi, %eax
5758 ; NoVLX-NEXT: vzeroupper
5761 %0 = bitcast <4 x i64> %__a to <16 x i16>
5762 %1 = bitcast <4 x i64> %__b to <16 x i16>
5763 %2 = icmp sgt <16 x i16> %0, %1
5764 %3 = bitcast i16 %__u to <16 x i1>
5765 %4 = and <16 x i1> %2, %3
5766 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5767 %6 = bitcast <64 x i1> %5 to i64
5771 define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
5772 ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5773 ; VLX: # %bb.0: # %entry
5774 ; VLX-NEXT: kmovd %edi, %k1
5775 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
5776 ; VLX-NEXT: kmovq %k0, %rax
5777 ; VLX-NEXT: vzeroupper
5780 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
5781 ; NoVLX: # %bb.0: # %entry
5782 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
5783 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5784 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5785 ; NoVLX-NEXT: kmovw %k0, %eax
5786 ; NoVLX-NEXT: andl %edi, %eax
5787 ; NoVLX-NEXT: vzeroupper
5790 %0 = bitcast <4 x i64> %__a to <16 x i16>
5791 %load = load <4 x i64>, <4 x i64>* %__b
5792 %1 = bitcast <4 x i64> %load to <16 x i16>
5793 %2 = icmp sgt <16 x i16> %0, %1
5794 %3 = bitcast i16 %__u to <16 x i1>
5795 %4 = and <16 x i1> %2, %3
5796 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
5797 %6 = bitcast <64 x i1> %5 to i64
5802 define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5803 ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5804 ; VLX: # %bb.0: # %entry
5805 ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
5806 ; VLX-NEXT: kmovq %k0, %rax
5807 ; VLX-NEXT: vzeroupper
5810 ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
5811 ; NoVLX: # %bb.0: # %entry
5812 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
5813 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
5814 ; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
5815 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5816 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5817 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5818 ; NoVLX-NEXT: kmovw %k0, %ecx
5819 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
5820 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5821 ; NoVLX-NEXT: kmovw %k0, %eax
5822 ; NoVLX-NEXT: shll $16, %eax
5823 ; NoVLX-NEXT: orl %ecx, %eax
5824 ; NoVLX-NEXT: vzeroupper
5827 %0 = bitcast <8 x i64> %__a to <32 x i16>
5828 %1 = bitcast <8 x i64> %__b to <32 x i16>
5829 %2 = icmp sgt <32 x i16> %0, %1
5830 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5831 %4 = bitcast <64 x i1> %3 to i64
5835 define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
5836 ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5837 ; VLX: # %bb.0: # %entry
5838 ; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0
5839 ; VLX-NEXT: kmovq %k0, %rax
5840 ; VLX-NEXT: vzeroupper
5843 ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
5844 ; NoVLX: # %bb.0: # %entry
5845 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
5846 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
5847 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5848 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5849 ; NoVLX-NEXT: kmovw %k0, %ecx
5850 ; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm0
5851 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5852 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5853 ; NoVLX-NEXT: kmovw %k0, %eax
5854 ; NoVLX-NEXT: shll $16, %eax
5855 ; NoVLX-NEXT: orl %ecx, %eax
5856 ; NoVLX-NEXT: vzeroupper
5859 %0 = bitcast <8 x i64> %__a to <32 x i16>
5860 %load = load <8 x i64>, <8 x i64>* %__b
5861 %1 = bitcast <8 x i64> %load to <32 x i16>
5862 %2 = icmp sgt <32 x i16> %0, %1
5863 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5864 %4 = bitcast <64 x i1> %3 to i64
5868 define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
5869 ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5870 ; VLX: # %bb.0: # %entry
5871 ; VLX-NEXT: kmovd %edi, %k1
5872 ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
5873 ; VLX-NEXT: kmovq %k0, %rax
5874 ; VLX-NEXT: vzeroupper
5877 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
5878 ; NoVLX: # %bb.0: # %entry
5879 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2
5880 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
5881 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
5882 ; NoVLX-NEXT: kmovw %k0, %eax
5883 ; NoVLX-NEXT: andl %edi, %eax
5884 ; NoVLX-NEXT: shrl $16, %edi
5885 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
5886 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
5887 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
5888 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5889 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5890 ; NoVLX-NEXT: kmovw %k0, %ecx
5891 ; NoVLX-NEXT: andl %edi, %ecx
5892 ; NoVLX-NEXT: shll $16, %ecx
5893 ; NoVLX-NEXT: movzwl %ax, %eax
5894 ; NoVLX-NEXT: orl %ecx, %eax
5895 ; NoVLX-NEXT: vzeroupper
5898 %0 = bitcast <8 x i64> %__a to <32 x i16>
5899 %1 = bitcast <8 x i64> %__b to <32 x i16>
5900 %2 = icmp sgt <32 x i16> %0, %1
5901 %3 = bitcast i32 %__u to <32 x i1>
5902 %4 = and <32 x i1> %2, %3
5903 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5904 %6 = bitcast <64 x i1> %5 to i64
5908 define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
5909 ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5910 ; VLX: # %bb.0: # %entry
5911 ; VLX-NEXT: kmovd %edi, %k1
5912 ; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
5913 ; VLX-NEXT: kmovq %k0, %rax
5914 ; VLX-NEXT: vzeroupper
5917 ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
5918 ; NoVLX: # %bb.0: # %entry
5919 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm1
5920 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
5921 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
5922 ; NoVLX-NEXT: kmovw %k0, %eax
5923 ; NoVLX-NEXT: andl %edi, %eax
5924 ; NoVLX-NEXT: shrl $16, %edi
5925 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
5926 ; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0
5927 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
5928 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
5929 ; NoVLX-NEXT: kmovw %k0, %ecx
5930 ; NoVLX-NEXT: andl %edi, %ecx
5931 ; NoVLX-NEXT: shll $16, %ecx
5932 ; NoVLX-NEXT: movzwl %ax, %eax
5933 ; NoVLX-NEXT: orl %ecx, %eax
5934 ; NoVLX-NEXT: vzeroupper
5937 %0 = bitcast <8 x i64> %__a to <32 x i16>
5938 %load = load <8 x i64>, <8 x i64>* %__b
5939 %1 = bitcast <8 x i64> %load to <32 x i16>
5940 %2 = icmp sgt <32 x i16> %0, %1
5941 %3 = bitcast i32 %__u to <32 x i1>
5942 %4 = and <32 x i1> %2, %3
5943 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
5944 %6 = bitcast <64 x i1> %5 to i64
5949 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
5950 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5951 ; VLX: # %bb.0: # %entry
5952 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
5953 ; VLX-NEXT: kmovd %k0, %eax
5954 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5957 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
5958 ; NoVLX: # %bb.0: # %entry
5959 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
5960 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5961 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
5962 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5963 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5964 ; NoVLX-NEXT: kmovw %k0, %eax
5965 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5966 ; NoVLX-NEXT: vzeroupper
5969 %0 = bitcast <2 x i64> %__a to <4 x i32>
5970 %1 = bitcast <2 x i64> %__b to <4 x i32>
5971 %2 = icmp sgt <4 x i32> %0, %1
5972 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5973 %4 = bitcast <8 x i1> %3 to i8
5977 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
5978 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5979 ; VLX: # %bb.0: # %entry
5980 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
5981 ; VLX-NEXT: kmovd %k0, %eax
5982 ; VLX-NEXT: # kill: def $al killed $al killed $eax
5985 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
5986 ; NoVLX: # %bb.0: # %entry
5987 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5988 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
5989 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
5990 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
5991 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
5992 ; NoVLX-NEXT: kmovw %k0, %eax
5993 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
5994 ; NoVLX-NEXT: vzeroupper
5997 %0 = bitcast <2 x i64> %__a to <4 x i32>
5998 %load = load <2 x i64>, <2 x i64>* %__b
5999 %1 = bitcast <2 x i64> %load to <4 x i32>
6000 %2 = icmp sgt <4 x i32> %0, %1
6001 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6002 %4 = bitcast <8 x i1> %3 to i8
6006 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6007 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
6008 ; VLX: # %bb.0: # %entry
6009 ; VLX-NEXT: kmovd %edi, %k1
6010 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6011 ; VLX-NEXT: kmovd %k0, %eax
6012 ; VLX-NEXT: # kill: def $al killed $al killed $eax
6015 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
6016 ; NoVLX: # %bb.0: # %entry
6017 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6018 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6019 ; NoVLX-NEXT: kmovw %edi, %k1
6020 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6021 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6022 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6023 ; NoVLX-NEXT: kmovw %k0, %eax
6024 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6025 ; NoVLX-NEXT: vzeroupper
6028 %0 = bitcast <2 x i64> %__a to <4 x i32>
6029 %1 = bitcast <2 x i64> %__b to <4 x i32>
6030 %2 = icmp sgt <4 x i32> %0, %1
6031 %3 = bitcast i8 %__u to <8 x i1>
6032 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6033 %4 = and <4 x i1> %2, %extract.i
6034 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6035 %6 = bitcast <8 x i1> %5 to i8
6039 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6040 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
6041 ; VLX: # %bb.0: # %entry
6042 ; VLX-NEXT: kmovd %edi, %k1
6043 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6044 ; VLX-NEXT: kmovd %k0, %eax
6045 ; VLX-NEXT: # kill: def $al killed $al killed $eax
6048 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
6049 ; NoVLX: # %bb.0: # %entry
6050 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6051 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6052 ; NoVLX-NEXT: kmovw %edi, %k1
6053 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6054 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6055 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6056 ; NoVLX-NEXT: kmovw %k0, %eax
6057 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6058 ; NoVLX-NEXT: vzeroupper
6061 %0 = bitcast <2 x i64> %__a to <4 x i32>
6062 %load = load <2 x i64>, <2 x i64>* %__b
6063 %1 = bitcast <2 x i64> %load to <4 x i32>
6064 %2 = icmp sgt <4 x i32> %0, %1
6065 %3 = bitcast i8 %__u to <8 x i1>
6066 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6067 %4 = and <4 x i1> %2, %extract.i
6068 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6069 %6 = bitcast <8 x i1> %5 to i8
6074 define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6075 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6076 ; VLX: # %bb.0: # %entry
6077 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6078 ; VLX-NEXT: kmovd %k0, %eax
6079 ; VLX-NEXT: # kill: def $al killed $al killed $eax
6082 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6083 ; NoVLX: # %bb.0: # %entry
6084 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6085 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
6086 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6087 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6088 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6089 ; NoVLX-NEXT: kmovw %k0, %eax
6090 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6091 ; NoVLX-NEXT: vzeroupper
6094 %0 = bitcast <2 x i64> %__a to <4 x i32>
6095 %load = load i32, i32* %__b
6096 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6097 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6098 %2 = icmp sgt <4 x i32> %0, %1
6099 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6100 %4 = bitcast <8 x i1> %3 to i8
6104 define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6105 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6106 ; VLX: # %bb.0: # %entry
6107 ; VLX-NEXT: kmovd %edi, %k1
6108 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6109 ; VLX-NEXT: kmovd %k0, %eax
6110 ; VLX-NEXT: # kill: def $al killed $al killed $eax
6113 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
6114 ; NoVLX: # %bb.0: # %entry
6115 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6116 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
6117 ; NoVLX-NEXT: kmovw %edi, %k1
6118 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6119 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6120 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6121 ; NoVLX-NEXT: kmovw %k0, %eax
6122 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
6123 ; NoVLX-NEXT: vzeroupper
6126 %0 = bitcast <2 x i64> %__a to <4 x i32>
6127 %load = load i32, i32* %__b
6128 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6129 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6130 %2 = icmp sgt <4 x i32> %0, %1
6131 %3 = bitcast i8 %__u to <8 x i1>
6132 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6133 %4 = and <4 x i1> %extract.i, %2
6134 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6135 %6 = bitcast <8 x i1> %5 to i8
6140 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6141 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6142 ; VLX: # %bb.0: # %entry
6143 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6144 ; VLX-NEXT: kmovd %k0, %eax
6145 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6148 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
6149 ; NoVLX: # %bb.0: # %entry
6150 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6151 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6152 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6153 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6154 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6155 ; NoVLX-NEXT: kmovw %k0, %eax
6156 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6157 ; NoVLX-NEXT: vzeroupper
6160 %0 = bitcast <2 x i64> %__a to <4 x i32>
6161 %1 = bitcast <2 x i64> %__b to <4 x i32>
6162 %2 = icmp sgt <4 x i32> %0, %1
6163 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6164 %4 = bitcast <16 x i1> %3 to i16
6168 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6169 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6170 ; VLX: # %bb.0: # %entry
6171 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6172 ; VLX-NEXT: kmovd %k0, %eax
6173 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6176 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
6177 ; NoVLX: # %bb.0: # %entry
6178 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6179 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6180 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6181 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6182 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6183 ; NoVLX-NEXT: kmovw %k0, %eax
6184 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6185 ; NoVLX-NEXT: vzeroupper
6188 %0 = bitcast <2 x i64> %__a to <4 x i32>
6189 %load = load <2 x i64>, <2 x i64>* %__b
6190 %1 = bitcast <2 x i64> %load to <4 x i32>
6191 %2 = icmp sgt <4 x i32> %0, %1
6192 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6193 %4 = bitcast <16 x i1> %3 to i16
6197 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6198 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6199 ; VLX: # %bb.0: # %entry
6200 ; VLX-NEXT: kmovd %edi, %k1
6201 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6202 ; VLX-NEXT: kmovd %k0, %eax
6203 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6206 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
6207 ; NoVLX: # %bb.0: # %entry
6208 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6209 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6210 ; NoVLX-NEXT: kmovw %edi, %k1
6211 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6212 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6213 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6214 ; NoVLX-NEXT: kmovw %k0, %eax
6215 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6216 ; NoVLX-NEXT: vzeroupper
6219 %0 = bitcast <2 x i64> %__a to <4 x i32>
6220 %1 = bitcast <2 x i64> %__b to <4 x i32>
6221 %2 = icmp sgt <4 x i32> %0, %1
6222 %3 = bitcast i8 %__u to <8 x i1>
6223 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6224 %4 = and <4 x i1> %2, %extract.i
6225 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6226 %6 = bitcast <16 x i1> %5 to i16
6230 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6231 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6232 ; VLX: # %bb.0: # %entry
6233 ; VLX-NEXT: kmovd %edi, %k1
6234 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6235 ; VLX-NEXT: kmovd %k0, %eax
6236 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6239 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
6240 ; NoVLX: # %bb.0: # %entry
6241 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6242 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6243 ; NoVLX-NEXT: kmovw %edi, %k1
6244 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6245 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6246 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6247 ; NoVLX-NEXT: kmovw %k0, %eax
6248 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6249 ; NoVLX-NEXT: vzeroupper
6252 %0 = bitcast <2 x i64> %__a to <4 x i32>
6253 %load = load <2 x i64>, <2 x i64>* %__b
6254 %1 = bitcast <2 x i64> %load to <4 x i32>
6255 %2 = icmp sgt <4 x i32> %0, %1
6256 %3 = bitcast i8 %__u to <8 x i1>
6257 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6258 %4 = and <4 x i1> %2, %extract.i
6259 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6260 %6 = bitcast <16 x i1> %5 to i16
6265 define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6266 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6267 ; VLX: # %bb.0: # %entry
6268 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6269 ; VLX-NEXT: kmovd %k0, %eax
6270 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6273 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6274 ; NoVLX: # %bb.0: # %entry
6275 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6276 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
6277 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6278 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6279 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6280 ; NoVLX-NEXT: kmovw %k0, %eax
6281 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6282 ; NoVLX-NEXT: vzeroupper
6285 %0 = bitcast <2 x i64> %__a to <4 x i32>
6286 %load = load i32, i32* %__b
6287 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6288 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6289 %2 = icmp sgt <4 x i32> %0, %1
6290 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6291 %4 = bitcast <16 x i1> %3 to i16
6295 define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6296 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6297 ; VLX: # %bb.0: # %entry
6298 ; VLX-NEXT: kmovd %edi, %k1
6299 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6300 ; VLX-NEXT: kmovd %k0, %eax
6301 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6304 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
6305 ; NoVLX: # %bb.0: # %entry
6306 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6307 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
6308 ; NoVLX-NEXT: kmovw %edi, %k1
6309 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6310 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6311 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6312 ; NoVLX-NEXT: kmovw %k0, %eax
6313 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6314 ; NoVLX-NEXT: vzeroupper
6317 %0 = bitcast <2 x i64> %__a to <4 x i32>
6318 %load = load i32, i32* %__b
6319 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6320 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6321 %2 = icmp sgt <4 x i32> %0, %1
6322 %3 = bitcast i8 %__u to <8 x i1>
6323 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6324 %4 = and <4 x i1> %extract.i, %2
6325 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6326 %6 = bitcast <16 x i1> %5 to i16
6331 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6332 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6333 ; VLX: # %bb.0: # %entry
6334 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6335 ; VLX-NEXT: kmovd %k0, %eax
6338 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
6339 ; NoVLX: # %bb.0: # %entry
6340 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6341 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6342 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6343 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6344 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6345 ; NoVLX-NEXT: kmovw %k0, %eax
6346 ; NoVLX-NEXT: vzeroupper
6349 %0 = bitcast <2 x i64> %__a to <4 x i32>
6350 %1 = bitcast <2 x i64> %__b to <4 x i32>
6351 %2 = icmp sgt <4 x i32> %0, %1
6352 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6353 %4 = bitcast <32 x i1> %3 to i32
6357 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6358 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6359 ; VLX: # %bb.0: # %entry
6360 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6361 ; VLX-NEXT: kmovd %k0, %eax
6364 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
6365 ; NoVLX: # %bb.0: # %entry
6366 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6367 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6368 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6369 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6370 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6371 ; NoVLX-NEXT: kmovw %k0, %eax
6372 ; NoVLX-NEXT: vzeroupper
6375 %0 = bitcast <2 x i64> %__a to <4 x i32>
6376 %load = load <2 x i64>, <2 x i64>* %__b
6377 %1 = bitcast <2 x i64> %load to <4 x i32>
6378 %2 = icmp sgt <4 x i32> %0, %1
6379 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6380 %4 = bitcast <32 x i1> %3 to i32
6384 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6385 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6386 ; VLX: # %bb.0: # %entry
6387 ; VLX-NEXT: kmovd %edi, %k1
6388 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6389 ; VLX-NEXT: kmovd %k0, %eax
6392 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
6393 ; NoVLX: # %bb.0: # %entry
6394 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6395 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6396 ; NoVLX-NEXT: kmovw %edi, %k1
6397 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6398 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6399 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6400 ; NoVLX-NEXT: kmovw %k0, %eax
6401 ; NoVLX-NEXT: vzeroupper
6404 %0 = bitcast <2 x i64> %__a to <4 x i32>
6405 %1 = bitcast <2 x i64> %__b to <4 x i32>
6406 %2 = icmp sgt <4 x i32> %0, %1
6407 %3 = bitcast i8 %__u to <8 x i1>
6408 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6409 %4 = and <4 x i1> %2, %extract.i
6410 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6411 %6 = bitcast <32 x i1> %5 to i32
6415 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6416 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6417 ; VLX: # %bb.0: # %entry
6418 ; VLX-NEXT: kmovd %edi, %k1
6419 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6420 ; VLX-NEXT: kmovd %k0, %eax
6423 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
6424 ; NoVLX: # %bb.0: # %entry
6425 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6426 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6427 ; NoVLX-NEXT: kmovw %edi, %k1
6428 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6429 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6430 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6431 ; NoVLX-NEXT: kmovw %k0, %eax
6432 ; NoVLX-NEXT: vzeroupper
6435 %0 = bitcast <2 x i64> %__a to <4 x i32>
6436 %load = load <2 x i64>, <2 x i64>* %__b
6437 %1 = bitcast <2 x i64> %load to <4 x i32>
6438 %2 = icmp sgt <4 x i32> %0, %1
6439 %3 = bitcast i8 %__u to <8 x i1>
6440 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6441 %4 = and <4 x i1> %2, %extract.i
6442 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6443 %6 = bitcast <32 x i1> %5 to i32
6448 define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6449 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6450 ; VLX: # %bb.0: # %entry
6451 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6452 ; VLX-NEXT: kmovd %k0, %eax
6455 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6456 ; NoVLX: # %bb.0: # %entry
6457 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6458 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
6459 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6460 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6461 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6462 ; NoVLX-NEXT: kmovw %k0, %eax
6463 ; NoVLX-NEXT: vzeroupper
6466 %0 = bitcast <2 x i64> %__a to <4 x i32>
6467 %load = load i32, i32* %__b
6468 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6469 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6470 %2 = icmp sgt <4 x i32> %0, %1
6471 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6472 %4 = bitcast <32 x i1> %3 to i32
6476 define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6477 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6478 ; VLX: # %bb.0: # %entry
6479 ; VLX-NEXT: kmovd %edi, %k1
6480 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6481 ; VLX-NEXT: kmovd %k0, %eax
6484 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
6485 ; NoVLX: # %bb.0: # %entry
6486 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6487 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
6488 ; NoVLX-NEXT: kmovw %edi, %k1
6489 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6490 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6491 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6492 ; NoVLX-NEXT: kmovw %k0, %eax
6493 ; NoVLX-NEXT: vzeroupper
6496 %0 = bitcast <2 x i64> %__a to <4 x i32>
6497 %load = load i32, i32* %__b
6498 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6499 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6500 %2 = icmp sgt <4 x i32> %0, %1
6501 %3 = bitcast i8 %__u to <8 x i1>
6502 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6503 %4 = and <4 x i1> %extract.i, %2
6504 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6505 %6 = bitcast <32 x i1> %5 to i32
6510 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6511 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6512 ; VLX: # %bb.0: # %entry
6513 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
6514 ; VLX-NEXT: kmovq %k0, %rax
6517 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
6518 ; NoVLX: # %bb.0: # %entry
6519 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6520 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6521 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6522 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6523 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6524 ; NoVLX-NEXT: kmovw %k0, %eax
6525 ; NoVLX-NEXT: movzwl %ax, %eax
6526 ; NoVLX-NEXT: vzeroupper
6529 %0 = bitcast <2 x i64> %__a to <4 x i32>
6530 %1 = bitcast <2 x i64> %__b to <4 x i32>
6531 %2 = icmp sgt <4 x i32> %0, %1
6532 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6533 %4 = bitcast <64 x i1> %3 to i64
6537 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6538 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6539 ; VLX: # %bb.0: # %entry
6540 ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
6541 ; VLX-NEXT: kmovq %k0, %rax
6544 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
6545 ; NoVLX: # %bb.0: # %entry
6546 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6547 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
6548 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6549 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6550 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6551 ; NoVLX-NEXT: kmovw %k0, %eax
6552 ; NoVLX-NEXT: movzwl %ax, %eax
6553 ; NoVLX-NEXT: vzeroupper
6556 %0 = bitcast <2 x i64> %__a to <4 x i32>
6557 %load = load <2 x i64>, <2 x i64>* %__b
6558 %1 = bitcast <2 x i64> %load to <4 x i32>
6559 %2 = icmp sgt <4 x i32> %0, %1
6560 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6561 %4 = bitcast <64 x i1> %3 to i64
6565 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
6566 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6567 ; VLX: # %bb.0: # %entry
6568 ; VLX-NEXT: kmovd %edi, %k1
6569 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
6570 ; VLX-NEXT: kmovq %k0, %rax
6573 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
6574 ; NoVLX: # %bb.0: # %entry
6575 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
6576 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6577 ; NoVLX-NEXT: kmovw %edi, %k1
6578 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6579 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6580 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6581 ; NoVLX-NEXT: kmovw %k0, %eax
6582 ; NoVLX-NEXT: movzwl %ax, %eax
6583 ; NoVLX-NEXT: vzeroupper
6586 %0 = bitcast <2 x i64> %__a to <4 x i32>
6587 %1 = bitcast <2 x i64> %__b to <4 x i32>
6588 %2 = icmp sgt <4 x i32> %0, %1
6589 %3 = bitcast i8 %__u to <8 x i1>
6590 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6591 %4 = and <4 x i1> %2, %extract.i
6592 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6593 %6 = bitcast <64 x i1> %5 to i64
6597 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
6598 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6599 ; VLX: # %bb.0: # %entry
6600 ; VLX-NEXT: kmovd %edi, %k1
6601 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
6602 ; VLX-NEXT: kmovq %k0, %rax
6605 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
6606 ; NoVLX: # %bb.0: # %entry
6607 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6608 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
6609 ; NoVLX-NEXT: kmovw %edi, %k1
6610 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6611 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6612 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6613 ; NoVLX-NEXT: kmovw %k0, %eax
6614 ; NoVLX-NEXT: movzwl %ax, %eax
6615 ; NoVLX-NEXT: vzeroupper
6618 %0 = bitcast <2 x i64> %__a to <4 x i32>
6619 %load = load <2 x i64>, <2 x i64>* %__b
6620 %1 = bitcast <2 x i64> %load to <4 x i32>
6621 %2 = icmp sgt <4 x i32> %0, %1
6622 %3 = bitcast i8 %__u to <8 x i1>
6623 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6624 %4 = and <4 x i1> %2, %extract.i
6625 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6626 %6 = bitcast <64 x i1> %5 to i64
6631 define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
6632 ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6633 ; VLX: # %bb.0: # %entry
6634 ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
6635 ; VLX-NEXT: kmovq %k0, %rax
6638 ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6639 ; NoVLX: # %bb.0: # %entry
6640 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6641 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
6642 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6643 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6644 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6645 ; NoVLX-NEXT: kmovw %k0, %eax
6646 ; NoVLX-NEXT: movzwl %ax, %eax
6647 ; NoVLX-NEXT: vzeroupper
6650 %0 = bitcast <2 x i64> %__a to <4 x i32>
6651 %load = load i32, i32* %__b
6652 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6653 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6654 %2 = icmp sgt <4 x i32> %0, %1
6655 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6656 %4 = bitcast <64 x i1> %3 to i64
6660 define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
6661 ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6662 ; VLX: # %bb.0: # %entry
6663 ; VLX-NEXT: kmovd %edi, %k1
6664 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
6665 ; VLX-NEXT: kmovq %k0, %rax
6668 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
6669 ; NoVLX: # %bb.0: # %entry
6670 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6671 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
6672 ; NoVLX-NEXT: kmovw %edi, %k1
6673 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6674 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
6675 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
6676 ; NoVLX-NEXT: kmovw %k0, %eax
6677 ; NoVLX-NEXT: movzwl %ax, %eax
6678 ; NoVLX-NEXT: vzeroupper
6681 %0 = bitcast <2 x i64> %__a to <4 x i32>
6682 %load = load i32, i32* %__b
6683 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
6684 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
6685 %2 = icmp sgt <4 x i32> %0, %1
6686 %3 = bitcast i8 %__u to <8 x i1>
6687 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6688 %4 = and <4 x i1> %extract.i, %2
6689 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
6690 %6 = bitcast <64 x i1> %5 to i64
6695 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6696 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6697 ; VLX: # %bb.0: # %entry
6698 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
6699 ; VLX-NEXT: kmovd %k0, %eax
6700 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6701 ; VLX-NEXT: vzeroupper
6704 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
6705 ; NoVLX: # %bb.0: # %entry
6706 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6707 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6708 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6709 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6710 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6711 ; NoVLX-NEXT: kmovw %k0, %eax
6712 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6713 ; NoVLX-NEXT: vzeroupper
6716 %0 = bitcast <4 x i64> %__a to <8 x i32>
6717 %1 = bitcast <4 x i64> %__b to <8 x i32>
6718 %2 = icmp sgt <8 x i32> %0, %1
6719 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6720 %4 = bitcast <16 x i1> %3 to i16
6724 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6725 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6726 ; VLX: # %bb.0: # %entry
6727 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
6728 ; VLX-NEXT: kmovd %k0, %eax
6729 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6730 ; VLX-NEXT: vzeroupper
6733 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
6734 ; NoVLX: # %bb.0: # %entry
6735 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6736 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
6737 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6738 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6739 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6740 ; NoVLX-NEXT: kmovw %k0, %eax
6741 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6742 ; NoVLX-NEXT: vzeroupper
6745 %0 = bitcast <4 x i64> %__a to <8 x i32>
6746 %load = load <4 x i64>, <4 x i64>* %__b
6747 %1 = bitcast <4 x i64> %load to <8 x i32>
6748 %2 = icmp sgt <8 x i32> %0, %1
6749 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6750 %4 = bitcast <16 x i1> %3 to i16
6754 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6755 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6756 ; VLX: # %bb.0: # %entry
6757 ; VLX-NEXT: kmovd %edi, %k1
6758 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6759 ; VLX-NEXT: kmovd %k0, %eax
6760 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6761 ; VLX-NEXT: vzeroupper
6764 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
6765 ; NoVLX: # %bb.0: # %entry
6766 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6767 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6768 ; NoVLX-NEXT: kmovw %edi, %k1
6769 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6770 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6771 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6772 ; NoVLX-NEXT: kmovw %k0, %eax
6773 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6774 ; NoVLX-NEXT: vzeroupper
6777 %0 = bitcast <4 x i64> %__a to <8 x i32>
6778 %1 = bitcast <4 x i64> %__b to <8 x i32>
6779 %2 = icmp sgt <8 x i32> %0, %1
6780 %3 = bitcast i8 %__u to <8 x i1>
6781 %4 = and <8 x i1> %2, %3
6782 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6783 %6 = bitcast <16 x i1> %5 to i16
6787 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6788 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6789 ; VLX: # %bb.0: # %entry
6790 ; VLX-NEXT: kmovd %edi, %k1
6791 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6792 ; VLX-NEXT: kmovd %k0, %eax
6793 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6794 ; VLX-NEXT: vzeroupper
6797 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
6798 ; NoVLX: # %bb.0: # %entry
6799 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6800 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
6801 ; NoVLX-NEXT: kmovw %edi, %k1
6802 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6803 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6804 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6805 ; NoVLX-NEXT: kmovw %k0, %eax
6806 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6807 ; NoVLX-NEXT: vzeroupper
6810 %0 = bitcast <4 x i64> %__a to <8 x i32>
6811 %load = load <4 x i64>, <4 x i64>* %__b
6812 %1 = bitcast <4 x i64> %load to <8 x i32>
6813 %2 = icmp sgt <8 x i32> %0, %1
6814 %3 = bitcast i8 %__u to <8 x i1>
6815 %4 = and <8 x i1> %2, %3
6816 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6817 %6 = bitcast <16 x i1> %5 to i16
6822 define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
6823 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6824 ; VLX: # %bb.0: # %entry
6825 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
6826 ; VLX-NEXT: kmovd %k0, %eax
6827 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6828 ; VLX-NEXT: vzeroupper
6831 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6832 ; NoVLX: # %bb.0: # %entry
6833 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6834 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
6835 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6836 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6837 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6838 ; NoVLX-NEXT: kmovw %k0, %eax
6839 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6840 ; NoVLX-NEXT: vzeroupper
6843 %0 = bitcast <4 x i64> %__a to <8 x i32>
6844 %load = load i32, i32* %__b
6845 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6846 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6847 %2 = icmp sgt <8 x i32> %0, %1
6848 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6849 %4 = bitcast <16 x i1> %3 to i16
6853 define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
6854 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6855 ; VLX: # %bb.0: # %entry
6856 ; VLX-NEXT: kmovd %edi, %k1
6857 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
6858 ; VLX-NEXT: kmovd %k0, %eax
6859 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
6860 ; VLX-NEXT: vzeroupper
6863 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
6864 ; NoVLX: # %bb.0: # %entry
6865 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6866 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
6867 ; NoVLX-NEXT: kmovw %edi, %k1
6868 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6869 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6870 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6871 ; NoVLX-NEXT: kmovw %k0, %eax
6872 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
6873 ; NoVLX-NEXT: vzeroupper
6876 %0 = bitcast <4 x i64> %__a to <8 x i32>
6877 %load = load i32, i32* %__b
6878 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
6879 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
6880 %2 = icmp sgt <8 x i32> %0, %1
6881 %3 = bitcast i8 %__u to <8 x i1>
6882 %4 = and <8 x i1> %3, %2
6883 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6884 %6 = bitcast <16 x i1> %5 to i16
6889 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6890 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6891 ; VLX: # %bb.0: # %entry
6892 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
6893 ; VLX-NEXT: kmovd %k0, %eax
6894 ; VLX-NEXT: vzeroupper
6897 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
6898 ; NoVLX: # %bb.0: # %entry
6899 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6900 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6901 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6902 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6903 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6904 ; NoVLX-NEXT: kmovw %k0, %eax
6905 ; NoVLX-NEXT: vzeroupper
6908 %0 = bitcast <4 x i64> %__a to <8 x i32>
6909 %1 = bitcast <4 x i64> %__b to <8 x i32>
6910 %2 = icmp sgt <8 x i32> %0, %1
6911 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6912 %4 = bitcast <32 x i1> %3 to i32
6916 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6917 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6918 ; VLX: # %bb.0: # %entry
6919 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
6920 ; VLX-NEXT: kmovd %k0, %eax
6921 ; VLX-NEXT: vzeroupper
6924 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
6925 ; NoVLX: # %bb.0: # %entry
6926 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6927 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
6928 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
6929 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6930 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6931 ; NoVLX-NEXT: kmovw %k0, %eax
6932 ; NoVLX-NEXT: vzeroupper
6935 %0 = bitcast <4 x i64> %__a to <8 x i32>
6936 %load = load <4 x i64>, <4 x i64>* %__b
6937 %1 = bitcast <4 x i64> %load to <8 x i32>
6938 %2 = icmp sgt <8 x i32> %0, %1
6939 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6940 %4 = bitcast <32 x i1> %3 to i32
6944 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
6945 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6946 ; VLX: # %bb.0: # %entry
6947 ; VLX-NEXT: kmovd %edi, %k1
6948 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
6949 ; VLX-NEXT: kmovd %k0, %eax
6950 ; VLX-NEXT: vzeroupper
6953 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
6954 ; NoVLX: # %bb.0: # %entry
6955 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
6956 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6957 ; NoVLX-NEXT: kmovw %edi, %k1
6958 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6959 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6960 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6961 ; NoVLX-NEXT: kmovw %k0, %eax
6962 ; NoVLX-NEXT: vzeroupper
6965 %0 = bitcast <4 x i64> %__a to <8 x i32>
6966 %1 = bitcast <4 x i64> %__b to <8 x i32>
6967 %2 = icmp sgt <8 x i32> %0, %1
6968 %3 = bitcast i8 %__u to <8 x i1>
6969 %4 = and <8 x i1> %2, %3
6970 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6971 %6 = bitcast <32 x i1> %5 to i32
6975 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
6976 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6977 ; VLX: # %bb.0: # %entry
6978 ; VLX-NEXT: kmovd %edi, %k1
6979 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
6980 ; VLX-NEXT: kmovd %k0, %eax
6981 ; VLX-NEXT: vzeroupper
6984 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
6985 ; NoVLX: # %bb.0: # %entry
6986 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
6987 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
6988 ; NoVLX-NEXT: kmovw %edi, %k1
6989 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
6990 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
6991 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
6992 ; NoVLX-NEXT: kmovw %k0, %eax
6993 ; NoVLX-NEXT: vzeroupper
6996 %0 = bitcast <4 x i64> %__a to <8 x i32>
6997 %load = load <4 x i64>, <4 x i64>* %__b
6998 %1 = bitcast <4 x i64> %load to <8 x i32>
6999 %2 = icmp sgt <8 x i32> %0, %1
7000 %3 = bitcast i8 %__u to <8 x i1>
7001 %4 = and <8 x i1> %2, %3
7002 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7003 %6 = bitcast <32 x i1> %5 to i32
7008 define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
7009 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
7010 ; VLX: # %bb.0: # %entry
7011 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
7012 ; VLX-NEXT: kmovd %k0, %eax
7013 ; VLX-NEXT: vzeroupper
7016 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
7017 ; NoVLX: # %bb.0: # %entry
7018 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7019 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
7020 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7021 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7022 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7023 ; NoVLX-NEXT: kmovw %k0, %eax
7024 ; NoVLX-NEXT: vzeroupper
7027 %0 = bitcast <4 x i64> %__a to <8 x i32>
7028 %load = load i32, i32* %__b
7029 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7030 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7031 %2 = icmp sgt <8 x i32> %0, %1
7032 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7033 %4 = bitcast <32 x i1> %3 to i32
7037 define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
7038 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
7039 ; VLX: # %bb.0: # %entry
7040 ; VLX-NEXT: kmovd %edi, %k1
7041 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
7042 ; VLX-NEXT: kmovd %k0, %eax
7043 ; VLX-NEXT: vzeroupper
7046 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
7047 ; NoVLX: # %bb.0: # %entry
7048 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7049 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
7050 ; NoVLX-NEXT: kmovw %edi, %k1
7051 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7052 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7053 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7054 ; NoVLX-NEXT: kmovw %k0, %eax
7055 ; NoVLX-NEXT: vzeroupper
7058 %0 = bitcast <4 x i64> %__a to <8 x i32>
7059 %load = load i32, i32* %__b
7060 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7061 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7062 %2 = icmp sgt <8 x i32> %0, %1
7063 %3 = bitcast i8 %__u to <8 x i1>
7064 %4 = and <8 x i1> %3, %2
7065 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7066 %6 = bitcast <32 x i1> %5 to i32
7071 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
7072 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
7073 ; VLX: # %bb.0: # %entry
7074 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
7075 ; VLX-NEXT: kmovq %k0, %rax
7076 ; VLX-NEXT: vzeroupper
7079 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
7080 ; NoVLX: # %bb.0: # %entry
7081 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
7082 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7083 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7084 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7085 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7086 ; NoVLX-NEXT: kmovw %k0, %eax
7087 ; NoVLX-NEXT: movzwl %ax, %eax
7088 ; NoVLX-NEXT: vzeroupper
7091 %0 = bitcast <4 x i64> %__a to <8 x i32>
7092 %1 = bitcast <4 x i64> %__b to <8 x i32>
7093 %2 = icmp sgt <8 x i32> %0, %1
7094 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7095 %4 = bitcast <64 x i1> %3 to i64
7099 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
7100 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7101 ; VLX: # %bb.0: # %entry
7102 ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
7103 ; VLX-NEXT: kmovq %k0, %rax
7104 ; VLX-NEXT: vzeroupper
7107 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
7108 ; NoVLX: # %bb.0: # %entry
7109 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7110 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
7111 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7112 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7113 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7114 ; NoVLX-NEXT: kmovw %k0, %eax
7115 ; NoVLX-NEXT: movzwl %ax, %eax
7116 ; NoVLX-NEXT: vzeroupper
7119 %0 = bitcast <4 x i64> %__a to <8 x i32>
7120 %load = load <4 x i64>, <4 x i64>* %__b
7121 %1 = bitcast <4 x i64> %load to <8 x i32>
7122 %2 = icmp sgt <8 x i32> %0, %1
7123 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7124 %4 = bitcast <64 x i1> %3 to i64
7128 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
7129 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7130 ; VLX: # %bb.0: # %entry
7131 ; VLX-NEXT: kmovd %edi, %k1
7132 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
7133 ; VLX-NEXT: kmovq %k0, %rax
7134 ; VLX-NEXT: vzeroupper
7137 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
7138 ; NoVLX: # %bb.0: # %entry
7139 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
7140 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7141 ; NoVLX-NEXT: kmovw %edi, %k1
7142 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7143 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7144 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7145 ; NoVLX-NEXT: kmovw %k0, %eax
7146 ; NoVLX-NEXT: movzwl %ax, %eax
7147 ; NoVLX-NEXT: vzeroupper
7150 %0 = bitcast <4 x i64> %__a to <8 x i32>
7151 %1 = bitcast <4 x i64> %__b to <8 x i32>
7152 %2 = icmp sgt <8 x i32> %0, %1
7153 %3 = bitcast i8 %__u to <8 x i1>
7154 %4 = and <8 x i1> %2, %3
7155 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7156 %6 = bitcast <64 x i1> %5 to i64
7160 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
7161 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7162 ; VLX: # %bb.0: # %entry
7163 ; VLX-NEXT: kmovd %edi, %k1
7164 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
7165 ; VLX-NEXT: kmovq %k0, %rax
7166 ; VLX-NEXT: vzeroupper
7169 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
7170 ; NoVLX: # %bb.0: # %entry
7171 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7172 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
7173 ; NoVLX-NEXT: kmovw %edi, %k1
7174 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7175 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7176 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7177 ; NoVLX-NEXT: kmovw %k0, %eax
7178 ; NoVLX-NEXT: movzwl %ax, %eax
7179 ; NoVLX-NEXT: vzeroupper
7182 %0 = bitcast <4 x i64> %__a to <8 x i32>
7183 %load = load <4 x i64>, <4 x i64>* %__b
7184 %1 = bitcast <4 x i64> %load to <8 x i32>
7185 %2 = icmp sgt <8 x i32> %0, %1
7186 %3 = bitcast i8 %__u to <8 x i1>
7187 %4 = and <8 x i1> %2, %3
7188 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7189 %6 = bitcast <64 x i1> %5 to i64
7194 define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
7195 ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7196 ; VLX: # %bb.0: # %entry
7197 ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
7198 ; VLX-NEXT: kmovq %k0, %rax
7199 ; VLX-NEXT: vzeroupper
7202 ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7203 ; NoVLX: # %bb.0: # %entry
7204 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7205 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
7206 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7207 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7208 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7209 ; NoVLX-NEXT: kmovw %k0, %eax
7210 ; NoVLX-NEXT: movzwl %ax, %eax
7211 ; NoVLX-NEXT: vzeroupper
7214 %0 = bitcast <4 x i64> %__a to <8 x i32>
7215 %load = load i32, i32* %__b
7216 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7217 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7218 %2 = icmp sgt <8 x i32> %0, %1
7219 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7220 %4 = bitcast <64 x i1> %3 to i64
7224 define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
7225 ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7226 ; VLX: # %bb.0: # %entry
7227 ; VLX-NEXT: kmovd %edi, %k1
7228 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
7229 ; VLX-NEXT: kmovq %k0, %rax
7230 ; VLX-NEXT: vzeroupper
7233 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
7234 ; NoVLX: # %bb.0: # %entry
7235 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
7236 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
7237 ; NoVLX-NEXT: kmovw %edi, %k1
7238 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7239 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
7240 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
7241 ; NoVLX-NEXT: kmovw %k0, %eax
7242 ; NoVLX-NEXT: movzwl %ax, %eax
7243 ; NoVLX-NEXT: vzeroupper
7246 %0 = bitcast <4 x i64> %__a to <8 x i32>
7247 %load = load i32, i32* %__b
7248 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
7249 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7250 %2 = icmp sgt <8 x i32> %0, %1
7251 %3 = bitcast i8 %__u to <8 x i1>
7252 %4 = and <8 x i1> %3, %2
7253 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7254 %6 = bitcast <64 x i1> %5 to i64
7259 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7260 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7261 ; VLX: # %bb.0: # %entry
7262 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7263 ; VLX-NEXT: kmovd %k0, %eax
7264 ; VLX-NEXT: vzeroupper
7267 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
7268 ; NoVLX: # %bb.0: # %entry
7269 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7270 ; NoVLX-NEXT: kmovw %k0, %eax
7271 ; NoVLX-NEXT: vzeroupper
7274 %0 = bitcast <8 x i64> %__a to <16 x i32>
7275 %1 = bitcast <8 x i64> %__b to <16 x i32>
7276 %2 = icmp sgt <16 x i32> %0, %1
7277 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7278 %4 = bitcast <32 x i1> %3 to i32
7282 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7283 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7284 ; VLX: # %bb.0: # %entry
7285 ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7286 ; VLX-NEXT: kmovd %k0, %eax
7287 ; VLX-NEXT: vzeroupper
7290 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
7291 ; NoVLX: # %bb.0: # %entry
7292 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7293 ; NoVLX-NEXT: kmovw %k0, %eax
7294 ; NoVLX-NEXT: vzeroupper
7297 %0 = bitcast <8 x i64> %__a to <16 x i32>
7298 %load = load <8 x i64>, <8 x i64>* %__b
7299 %1 = bitcast <8 x i64> %load to <16 x i32>
7300 %2 = icmp sgt <16 x i32> %0, %1
7301 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7302 %4 = bitcast <32 x i1> %3 to i32
7306 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7307 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7308 ; VLX: # %bb.0: # %entry
7309 ; VLX-NEXT: kmovd %edi, %k1
7310 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7311 ; VLX-NEXT: kmovd %k0, %eax
7312 ; VLX-NEXT: vzeroupper
7315 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
7316 ; NoVLX: # %bb.0: # %entry
7317 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7318 ; NoVLX-NEXT: kmovw %k0, %eax
7319 ; NoVLX-NEXT: andl %edi, %eax
7320 ; NoVLX-NEXT: vzeroupper
7323 %0 = bitcast <8 x i64> %__a to <16 x i32>
7324 %1 = bitcast <8 x i64> %__b to <16 x i32>
7325 %2 = icmp sgt <16 x i32> %0, %1
7326 %3 = bitcast i16 %__u to <16 x i1>
7327 %4 = and <16 x i1> %2, %3
7328 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7329 %6 = bitcast <32 x i1> %5 to i32
7333 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7334 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7335 ; VLX: # %bb.0: # %entry
7336 ; VLX-NEXT: kmovd %edi, %k1
7337 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7338 ; VLX-NEXT: kmovd %k0, %eax
7339 ; VLX-NEXT: vzeroupper
7342 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
7343 ; NoVLX: # %bb.0: # %entry
7344 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0
7345 ; NoVLX-NEXT: kmovw %k0, %eax
7346 ; NoVLX-NEXT: andl %edi, %eax
7347 ; NoVLX-NEXT: vzeroupper
7350 %0 = bitcast <8 x i64> %__a to <16 x i32>
7351 %load = load <8 x i64>, <8 x i64>* %__b
7352 %1 = bitcast <8 x i64> %load to <16 x i32>
7353 %2 = icmp sgt <16 x i32> %0, %1
7354 %3 = bitcast i16 %__u to <16 x i1>
7355 %4 = and <16 x i1> %2, %3
7356 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7357 %6 = bitcast <32 x i1> %5 to i32
7362 define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
7363 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7364 ; VLX: # %bb.0: # %entry
7365 ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7366 ; VLX-NEXT: kmovd %k0, %eax
7367 ; VLX-NEXT: vzeroupper
7370 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7371 ; NoVLX: # %bb.0: # %entry
7372 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7373 ; NoVLX-NEXT: kmovw %k0, %eax
7374 ; NoVLX-NEXT: vzeroupper
7377 %0 = bitcast <8 x i64> %__a to <16 x i32>
7378 %load = load i32, i32* %__b
7379 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7380 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7381 %2 = icmp sgt <16 x i32> %0, %1
7382 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7383 %4 = bitcast <32 x i1> %3 to i32
7387 define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
7388 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7389 ; VLX: # %bb.0: # %entry
7390 ; VLX-NEXT: kmovd %edi, %k1
7391 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7392 ; VLX-NEXT: kmovd %k0, %eax
7393 ; VLX-NEXT: vzeroupper
7396 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
7397 ; NoVLX: # %bb.0: # %entry
7398 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7399 ; NoVLX-NEXT: kmovw %k0, %eax
7400 ; NoVLX-NEXT: andl %edi, %eax
7401 ; NoVLX-NEXT: vzeroupper
7404 %0 = bitcast <8 x i64> %__a to <16 x i32>
7405 %load = load i32, i32* %__b
7406 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7407 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7408 %2 = icmp sgt <16 x i32> %0, %1
7409 %3 = bitcast i16 %__u to <16 x i1>
7410 %4 = and <16 x i1> %3, %2
7411 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
7412 %6 = bitcast <32 x i1> %5 to i32
7417 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7418 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7419 ; VLX: # %bb.0: # %entry
7420 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7421 ; VLX-NEXT: kmovq %k0, %rax
7422 ; VLX-NEXT: vzeroupper
7425 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
7426 ; NoVLX: # %bb.0: # %entry
7427 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7428 ; NoVLX-NEXT: kmovw %k0, %eax
7429 ; NoVLX-NEXT: movzwl %ax, %eax
7430 ; NoVLX-NEXT: vzeroupper
7433 %0 = bitcast <8 x i64> %__a to <16 x i32>
7434 %1 = bitcast <8 x i64> %__b to <16 x i32>
7435 %2 = icmp sgt <16 x i32> %0, %1
7436 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7437 %4 = bitcast <64 x i1> %3 to i64
7441 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7442 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7443 ; VLX: # %bb.0: # %entry
7444 ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7445 ; VLX-NEXT: kmovq %k0, %rax
7446 ; VLX-NEXT: vzeroupper
7449 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
7450 ; NoVLX: # %bb.0: # %entry
7451 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
7452 ; NoVLX-NEXT: kmovw %k0, %eax
7453 ; NoVLX-NEXT: movzwl %ax, %eax
7454 ; NoVLX-NEXT: vzeroupper
7457 %0 = bitcast <8 x i64> %__a to <16 x i32>
7458 %load = load <8 x i64>, <8 x i64>* %__b
7459 %1 = bitcast <8 x i64> %load to <16 x i32>
7460 %2 = icmp sgt <16 x i32> %0, %1
7461 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7462 %4 = bitcast <64 x i1> %3 to i64
7466 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
7467 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7468 ; VLX: # %bb.0: # %entry
7469 ; VLX-NEXT: kmovd %edi, %k1
7470 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
7471 ; VLX-NEXT: kmovq %k0, %rax
7472 ; VLX-NEXT: vzeroupper
7475 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
7476 ; NoVLX: # %bb.0: # %entry
7477 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
7478 ; NoVLX-NEXT: kmovw %k0, %eax
7479 ; NoVLX-NEXT: andl %edi, %eax
7480 ; NoVLX-NEXT: vzeroupper
7483 %0 = bitcast <8 x i64> %__a to <16 x i32>
7484 %1 = bitcast <8 x i64> %__b to <16 x i32>
7485 %2 = icmp sgt <16 x i32> %0, %1
7486 %3 = bitcast i16 %__u to <16 x i1>
7487 %4 = and <16 x i1> %2, %3
7488 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7489 %6 = bitcast <64 x i1> %5 to i64
7493 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
7494 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7495 ; VLX: # %bb.0: # %entry
7496 ; VLX-NEXT: kmovd %edi, %k1
7497 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
7498 ; VLX-NEXT: kmovq %k0, %rax
7499 ; VLX-NEXT: vzeroupper
7502 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
7503 ; NoVLX: # %bb.0: # %entry
7504 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0
7505 ; NoVLX-NEXT: kmovw %k0, %eax
7506 ; NoVLX-NEXT: andl %edi, %eax
7507 ; NoVLX-NEXT: vzeroupper
7510 %0 = bitcast <8 x i64> %__a to <16 x i32>
7511 %load = load <8 x i64>, <8 x i64>* %__b
7512 %1 = bitcast <8 x i64> %load to <16 x i32>
7513 %2 = icmp sgt <16 x i32> %0, %1
7514 %3 = bitcast i16 %__u to <16 x i1>
7515 %4 = and <16 x i1> %2, %3
7516 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7517 %6 = bitcast <64 x i1> %5 to i64
7522 define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
7523 ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7524 ; VLX: # %bb.0: # %entry
7525 ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7526 ; VLX-NEXT: kmovq %k0, %rax
7527 ; VLX-NEXT: vzeroupper
7530 ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7531 ; NoVLX: # %bb.0: # %entry
7532 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
7533 ; NoVLX-NEXT: kmovw %k0, %eax
7534 ; NoVLX-NEXT: movzwl %ax, %eax
7535 ; NoVLX-NEXT: vzeroupper
7538 %0 = bitcast <8 x i64> %__a to <16 x i32>
7539 %load = load i32, i32* %__b
7540 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7541 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7542 %2 = icmp sgt <16 x i32> %0, %1
7543 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7544 %4 = bitcast <64 x i1> %3 to i64
7548 define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
7549 ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7550 ; VLX: # %bb.0: # %entry
7551 ; VLX-NEXT: kmovd %edi, %k1
7552 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
7553 ; VLX-NEXT: kmovq %k0, %rax
7554 ; VLX-NEXT: vzeroupper
7557 ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
7558 ; NoVLX: # %bb.0: # %entry
7559 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0
7560 ; NoVLX-NEXT: kmovw %k0, %eax
7561 ; NoVLX-NEXT: andl %edi, %eax
7562 ; NoVLX-NEXT: vzeroupper
7565 %0 = bitcast <8 x i64> %__a to <16 x i32>
7566 %load = load i32, i32* %__b
7567 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
7568 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
7569 %2 = icmp sgt <16 x i32> %0, %1
7570 %3 = bitcast i16 %__u to <16 x i1>
7571 %4 = and <16 x i1> %3, %2
7572 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
7573 %6 = bitcast <64 x i1> %5 to i64
7578 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7579 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7580 ; VLX: # %bb.0: # %entry
7581 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7582 ; VLX-NEXT: kmovb %k0, %eax
7585 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
7586 ; NoVLX: # %bb.0: # %entry
7587 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7588 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7589 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7590 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7591 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7592 ; NoVLX-NEXT: kmovw %k0, %eax
7593 ; NoVLX-NEXT: andl $3, %eax
7594 ; NoVLX-NEXT: vzeroupper
7597 %0 = bitcast <2 x i64> %__a to <2 x i64>
7598 %1 = bitcast <2 x i64> %__b to <2 x i64>
7599 %2 = icmp sgt <2 x i64> %0, %1
7600 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7601 %4 = bitcast <4 x i1> %3 to i4
7605 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7606 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7607 ; VLX: # %bb.0: # %entry
7608 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7609 ; VLX-NEXT: kmovb %k0, %eax
7612 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
7613 ; NoVLX: # %bb.0: # %entry
7614 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7615 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7616 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7617 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7618 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7619 ; NoVLX-NEXT: kmovw %k0, %eax
7620 ; NoVLX-NEXT: andl $3, %eax
7621 ; NoVLX-NEXT: vzeroupper
7624 %0 = bitcast <2 x i64> %__a to <2 x i64>
7625 %load = load <2 x i64>, <2 x i64>* %__b
7626 %1 = bitcast <2 x i64> %load to <2 x i64>
7627 %2 = icmp sgt <2 x i64> %0, %1
7628 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7629 %4 = bitcast <4 x i1> %3 to i4
7633 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7634 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7635 ; VLX: # %bb.0: # %entry
7636 ; VLX-NEXT: kmovd %edi, %k1
7637 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7638 ; VLX-NEXT: kmovb %k0, %eax
7641 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
7642 ; NoVLX: # %bb.0: # %entry
7643 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7644 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7645 ; NoVLX-NEXT: kmovw %edi, %k1
7646 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7647 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7648 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7649 ; NoVLX-NEXT: kmovw %k0, %eax
7650 ; NoVLX-NEXT: andl $3, %eax
7651 ; NoVLX-NEXT: vzeroupper
7654 %0 = bitcast <2 x i64> %__a to <2 x i64>
7655 %1 = bitcast <2 x i64> %__b to <2 x i64>
7656 %2 = icmp sgt <2 x i64> %0, %1
7657 %3 = bitcast i8 %__u to <8 x i1>
7658 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7659 %4 = and <2 x i1> %2, %extract.i
7660 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7661 %6 = bitcast <4 x i1> %5 to i4
7665 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7666 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7667 ; VLX: # %bb.0: # %entry
7668 ; VLX-NEXT: kmovd %edi, %k1
7669 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7670 ; VLX-NEXT: kmovb %k0, %eax
7673 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
7674 ; NoVLX: # %bb.0: # %entry
7675 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7676 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7677 ; NoVLX-NEXT: kmovw %edi, %k1
7678 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7679 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7680 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7681 ; NoVLX-NEXT: kmovw %k0, %eax
7682 ; NoVLX-NEXT: andl $3, %eax
7683 ; NoVLX-NEXT: vzeroupper
7686 %0 = bitcast <2 x i64> %__a to <2 x i64>
7687 %load = load <2 x i64>, <2 x i64>* %__b
7688 %1 = bitcast <2 x i64> %load to <2 x i64>
7689 %2 = icmp sgt <2 x i64> %0, %1
7690 %3 = bitcast i8 %__u to <8 x i1>
7691 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7692 %4 = and <2 x i1> %2, %extract.i
7693 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7694 %6 = bitcast <4 x i1> %5 to i4
7699 define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
7700 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7701 ; VLX: # %bb.0: # %entry
7702 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7703 ; VLX-NEXT: kmovb %k0, %eax
7706 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7707 ; NoVLX: # %bb.0: # %entry
7708 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7709 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
7710 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7711 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7712 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7713 ; NoVLX-NEXT: kmovw %k0, %eax
7714 ; NoVLX-NEXT: andl $3, %eax
7715 ; NoVLX-NEXT: vzeroupper
7718 %0 = bitcast <2 x i64> %__a to <2 x i64>
7719 %load = load i64, i64* %__b
7720 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7721 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7722 %2 = icmp sgt <2 x i64> %0, %1
7723 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7724 %4 = bitcast <4 x i1> %3 to i4
7728 define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
7729 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7730 ; VLX: # %bb.0: # %entry
7731 ; VLX-NEXT: kmovd %edi, %k1
7732 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7733 ; VLX-NEXT: kmovb %k0, %eax
7736 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
7737 ; NoVLX: # %bb.0: # %entry
7738 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7739 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
7740 ; NoVLX-NEXT: kmovw %edi, %k1
7741 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7742 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7743 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7744 ; NoVLX-NEXT: kmovw %k0, %eax
7745 ; NoVLX-NEXT: andl $3, %eax
7746 ; NoVLX-NEXT: vzeroupper
7749 %0 = bitcast <2 x i64> %__a to <2 x i64>
7750 %load = load i64, i64* %__b
7751 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7752 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7753 %2 = icmp sgt <2 x i64> %0, %1
7754 %3 = bitcast i8 %__u to <8 x i1>
7755 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7756 %4 = and <2 x i1> %extract.i, %2
7757 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7758 %6 = bitcast <4 x i1> %5 to i4
7763 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7764 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7765 ; VLX: # %bb.0: # %entry
7766 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7767 ; VLX-NEXT: kmovd %k0, %eax
7768 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7771 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
7772 ; NoVLX: # %bb.0: # %entry
7773 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7774 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7775 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7776 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7777 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7778 ; NoVLX-NEXT: kmovw %k0, %eax
7779 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7780 ; NoVLX-NEXT: vzeroupper
7783 %0 = bitcast <2 x i64> %__a to <2 x i64>
7784 %1 = bitcast <2 x i64> %__b to <2 x i64>
7785 %2 = icmp sgt <2 x i64> %0, %1
7786 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7787 %4 = bitcast <8 x i1> %3 to i8
7791 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7792 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7793 ; VLX: # %bb.0: # %entry
7794 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7795 ; VLX-NEXT: kmovd %k0, %eax
7796 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7799 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
7800 ; NoVLX: # %bb.0: # %entry
7801 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7802 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7803 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7804 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7805 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7806 ; NoVLX-NEXT: kmovw %k0, %eax
7807 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7808 ; NoVLX-NEXT: vzeroupper
7811 %0 = bitcast <2 x i64> %__a to <2 x i64>
7812 %load = load <2 x i64>, <2 x i64>* %__b
7813 %1 = bitcast <2 x i64> %load to <2 x i64>
7814 %2 = icmp sgt <2 x i64> %0, %1
7815 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7816 %4 = bitcast <8 x i1> %3 to i8
7820 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7821 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7822 ; VLX: # %bb.0: # %entry
7823 ; VLX-NEXT: kmovd %edi, %k1
7824 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
7825 ; VLX-NEXT: kmovd %k0, %eax
7826 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7829 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
7830 ; NoVLX: # %bb.0: # %entry
7831 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7832 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7833 ; NoVLX-NEXT: kmovw %edi, %k1
7834 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7835 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7836 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7837 ; NoVLX-NEXT: kmovw %k0, %eax
7838 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7839 ; NoVLX-NEXT: vzeroupper
7842 %0 = bitcast <2 x i64> %__a to <2 x i64>
7843 %1 = bitcast <2 x i64> %__b to <2 x i64>
7844 %2 = icmp sgt <2 x i64> %0, %1
7845 %3 = bitcast i8 %__u to <8 x i1>
7846 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7847 %4 = and <2 x i1> %2, %extract.i
7848 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7849 %6 = bitcast <8 x i1> %5 to i8
7853 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7854 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7855 ; VLX: # %bb.0: # %entry
7856 ; VLX-NEXT: kmovd %edi, %k1
7857 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
7858 ; VLX-NEXT: kmovd %k0, %eax
7859 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7862 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
7863 ; NoVLX: # %bb.0: # %entry
7864 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7865 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
7866 ; NoVLX-NEXT: kmovw %edi, %k1
7867 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7868 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7869 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7870 ; NoVLX-NEXT: kmovw %k0, %eax
7871 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7872 ; NoVLX-NEXT: vzeroupper
7875 %0 = bitcast <2 x i64> %__a to <2 x i64>
7876 %load = load <2 x i64>, <2 x i64>* %__b
7877 %1 = bitcast <2 x i64> %load to <2 x i64>
7878 %2 = icmp sgt <2 x i64> %0, %1
7879 %3 = bitcast i8 %__u to <8 x i1>
7880 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7881 %4 = and <2 x i1> %2, %extract.i
7882 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7883 %6 = bitcast <8 x i1> %5 to i8
7888 define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
7889 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7890 ; VLX: # %bb.0: # %entry
7891 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
7892 ; VLX-NEXT: kmovd %k0, %eax
7893 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7896 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7897 ; NoVLX: # %bb.0: # %entry
7898 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7899 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
7900 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7901 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7902 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7903 ; NoVLX-NEXT: kmovw %k0, %eax
7904 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7905 ; NoVLX-NEXT: vzeroupper
7908 %0 = bitcast <2 x i64> %__a to <2 x i64>
7909 %load = load i64, i64* %__b
7910 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7911 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7912 %2 = icmp sgt <2 x i64> %0, %1
7913 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7914 %4 = bitcast <8 x i1> %3 to i8
7918 define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
7919 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7920 ; VLX: # %bb.0: # %entry
7921 ; VLX-NEXT: kmovd %edi, %k1
7922 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
7923 ; VLX-NEXT: kmovd %k0, %eax
7924 ; VLX-NEXT: # kill: def $al killed $al killed $eax
7927 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
7928 ; NoVLX: # %bb.0: # %entry
7929 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7930 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
7931 ; NoVLX-NEXT: kmovw %edi, %k1
7932 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
7933 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7934 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7935 ; NoVLX-NEXT: kmovw %k0, %eax
7936 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
7937 ; NoVLX-NEXT: vzeroupper
7940 %0 = bitcast <2 x i64> %__a to <2 x i64>
7941 %load = load i64, i64* %__b
7942 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
7943 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
7944 %2 = icmp sgt <2 x i64> %0, %1
7945 %3 = bitcast i8 %__u to <8 x i1>
7946 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
7947 %4 = and <2 x i1> %extract.i, %2
7948 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7949 %6 = bitcast <8 x i1> %5 to i8
7954 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
7955 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7956 ; VLX: # %bb.0: # %entry
7957 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
7958 ; VLX-NEXT: kmovd %k0, %eax
7959 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7962 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
7963 ; NoVLX: # %bb.0: # %entry
7964 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
7965 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7966 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7967 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7968 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7969 ; NoVLX-NEXT: kmovw %k0, %eax
7970 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7971 ; NoVLX-NEXT: vzeroupper
7974 %0 = bitcast <2 x i64> %__a to <2 x i64>
7975 %1 = bitcast <2 x i64> %__b to <2 x i64>
7976 %2 = icmp sgt <2 x i64> %0, %1
7977 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
7978 %4 = bitcast <16 x i1> %3 to i16
7982 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
7983 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7984 ; VLX: # %bb.0: # %entry
7985 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
7986 ; VLX-NEXT: kmovd %k0, %eax
7987 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
7990 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
7991 ; NoVLX: # %bb.0: # %entry
7992 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7993 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
7994 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
7995 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
7996 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
7997 ; NoVLX-NEXT: kmovw %k0, %eax
7998 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
7999 ; NoVLX-NEXT: vzeroupper
8002 %0 = bitcast <2 x i64> %__a to <2 x i64>
8003 %load = load <2 x i64>, <2 x i64>* %__b
8004 %1 = bitcast <2 x i64> %load to <2 x i64>
8005 %2 = icmp sgt <2 x i64> %0, %1
8006 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8007 %4 = bitcast <16 x i1> %3 to i16
8011 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8012 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
8013 ; VLX: # %bb.0: # %entry
8014 ; VLX-NEXT: kmovd %edi, %k1
8015 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8016 ; VLX-NEXT: kmovd %k0, %eax
8017 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8020 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
8021 ; NoVLX: # %bb.0: # %entry
8022 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8023 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8024 ; NoVLX-NEXT: kmovw %edi, %k1
8025 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8026 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8027 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8028 ; NoVLX-NEXT: kmovw %k0, %eax
8029 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8030 ; NoVLX-NEXT: vzeroupper
8033 %0 = bitcast <2 x i64> %__a to <2 x i64>
8034 %1 = bitcast <2 x i64> %__b to <2 x i64>
8035 %2 = icmp sgt <2 x i64> %0, %1
8036 %3 = bitcast i8 %__u to <8 x i1>
8037 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8038 %4 = and <2 x i1> %2, %extract.i
8039 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8040 %6 = bitcast <16 x i1> %5 to i16
8044 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8045 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
8046 ; VLX: # %bb.0: # %entry
8047 ; VLX-NEXT: kmovd %edi, %k1
8048 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8049 ; VLX-NEXT: kmovd %k0, %eax
8050 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8053 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
8054 ; NoVLX: # %bb.0: # %entry
8055 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8056 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
8057 ; NoVLX-NEXT: kmovw %edi, %k1
8058 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8059 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8060 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8061 ; NoVLX-NEXT: kmovw %k0, %eax
8062 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8063 ; NoVLX-NEXT: vzeroupper
8066 %0 = bitcast <2 x i64> %__a to <2 x i64>
8067 %load = load <2 x i64>, <2 x i64>* %__b
8068 %1 = bitcast <2 x i64> %load to <2 x i64>
8069 %2 = icmp sgt <2 x i64> %0, %1
8070 %3 = bitcast i8 %__u to <8 x i1>
8071 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8072 %4 = and <2 x i1> %2, %extract.i
8073 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8074 %6 = bitcast <16 x i1> %5 to i16
8079 define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8080 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8081 ; VLX: # %bb.0: # %entry
8082 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8083 ; VLX-NEXT: kmovd %k0, %eax
8084 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8087 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8088 ; NoVLX: # %bb.0: # %entry
8089 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8090 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
8091 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8092 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8093 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8094 ; NoVLX-NEXT: kmovw %k0, %eax
8095 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8096 ; NoVLX-NEXT: vzeroupper
8099 %0 = bitcast <2 x i64> %__a to <2 x i64>
8100 %load = load i64, i64* %__b
8101 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8102 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8103 %2 = icmp sgt <2 x i64> %0, %1
8104 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8105 %4 = bitcast <16 x i1> %3 to i16
8109 define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8110 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8111 ; VLX: # %bb.0: # %entry
8112 ; VLX-NEXT: kmovd %edi, %k1
8113 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8114 ; VLX-NEXT: kmovd %k0, %eax
8115 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8118 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
8119 ; NoVLX: # %bb.0: # %entry
8120 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8121 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
8122 ; NoVLX-NEXT: kmovw %edi, %k1
8123 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8124 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8125 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8126 ; NoVLX-NEXT: kmovw %k0, %eax
8127 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8128 ; NoVLX-NEXT: vzeroupper
8131 %0 = bitcast <2 x i64> %__a to <2 x i64>
8132 %load = load i64, i64* %__b
8133 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8134 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8135 %2 = icmp sgt <2 x i64> %0, %1
8136 %3 = bitcast i8 %__u to <8 x i1>
8137 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8138 %4 = and <2 x i1> %extract.i, %2
8139 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8140 %6 = bitcast <16 x i1> %5 to i16
8145 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8146 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8147 ; VLX: # %bb.0: # %entry
8148 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
8149 ; VLX-NEXT: kmovd %k0, %eax
8152 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
8153 ; NoVLX: # %bb.0: # %entry
8154 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8155 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8156 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8157 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8158 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8159 ; NoVLX-NEXT: kmovw %k0, %eax
8160 ; NoVLX-NEXT: vzeroupper
8163 %0 = bitcast <2 x i64> %__a to <2 x i64>
8164 %1 = bitcast <2 x i64> %__b to <2 x i64>
8165 %2 = icmp sgt <2 x i64> %0, %1
8166 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8167 %4 = bitcast <32 x i1> %3 to i32
8171 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8172 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8173 ; VLX: # %bb.0: # %entry
8174 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
8175 ; VLX-NEXT: kmovd %k0, %eax
8178 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
8179 ; NoVLX: # %bb.0: # %entry
8180 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8181 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
8182 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8183 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8184 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8185 ; NoVLX-NEXT: kmovw %k0, %eax
8186 ; NoVLX-NEXT: vzeroupper
8189 %0 = bitcast <2 x i64> %__a to <2 x i64>
8190 %load = load <2 x i64>, <2 x i64>* %__b
8191 %1 = bitcast <2 x i64> %load to <2 x i64>
8192 %2 = icmp sgt <2 x i64> %0, %1
8193 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8194 %4 = bitcast <32 x i1> %3 to i32
8198 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8199 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8200 ; VLX: # %bb.0: # %entry
8201 ; VLX-NEXT: kmovd %edi, %k1
8202 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8203 ; VLX-NEXT: kmovd %k0, %eax
8206 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
8207 ; NoVLX: # %bb.0: # %entry
8208 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8209 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8210 ; NoVLX-NEXT: kmovw %edi, %k1
8211 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8212 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8213 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8214 ; NoVLX-NEXT: kmovw %k0, %eax
8215 ; NoVLX-NEXT: vzeroupper
8218 %0 = bitcast <2 x i64> %__a to <2 x i64>
8219 %1 = bitcast <2 x i64> %__b to <2 x i64>
8220 %2 = icmp sgt <2 x i64> %0, %1
8221 %3 = bitcast i8 %__u to <8 x i1>
8222 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8223 %4 = and <2 x i1> %2, %extract.i
8224 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8225 %6 = bitcast <32 x i1> %5 to i32
8229 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8230 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8231 ; VLX: # %bb.0: # %entry
8232 ; VLX-NEXT: kmovd %edi, %k1
8233 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8234 ; VLX-NEXT: kmovd %k0, %eax
8237 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
8238 ; NoVLX: # %bb.0: # %entry
8239 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8240 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
8241 ; NoVLX-NEXT: kmovw %edi, %k1
8242 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8243 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8244 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8245 ; NoVLX-NEXT: kmovw %k0, %eax
8246 ; NoVLX-NEXT: vzeroupper
8249 %0 = bitcast <2 x i64> %__a to <2 x i64>
8250 %load = load <2 x i64>, <2 x i64>* %__b
8251 %1 = bitcast <2 x i64> %load to <2 x i64>
8252 %2 = icmp sgt <2 x i64> %0, %1
8253 %3 = bitcast i8 %__u to <8 x i1>
8254 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8255 %4 = and <2 x i1> %2, %extract.i
8256 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8257 %6 = bitcast <32 x i1> %5 to i32
8262 define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8263 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8264 ; VLX: # %bb.0: # %entry
8265 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8266 ; VLX-NEXT: kmovd %k0, %eax
8269 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8270 ; NoVLX: # %bb.0: # %entry
8271 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8272 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
8273 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8274 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8275 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8276 ; NoVLX-NEXT: kmovw %k0, %eax
8277 ; NoVLX-NEXT: vzeroupper
8280 %0 = bitcast <2 x i64> %__a to <2 x i64>
8281 %load = load i64, i64* %__b
8282 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8283 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8284 %2 = icmp sgt <2 x i64> %0, %1
8285 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8286 %4 = bitcast <32 x i1> %3 to i32
8290 define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8291 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8292 ; VLX: # %bb.0: # %entry
8293 ; VLX-NEXT: kmovd %edi, %k1
8294 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8295 ; VLX-NEXT: kmovd %k0, %eax
8298 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
8299 ; NoVLX: # %bb.0: # %entry
8300 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8301 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
8302 ; NoVLX-NEXT: kmovw %edi, %k1
8303 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8304 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8305 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8306 ; NoVLX-NEXT: kmovw %k0, %eax
8307 ; NoVLX-NEXT: vzeroupper
8310 %0 = bitcast <2 x i64> %__a to <2 x i64>
8311 %load = load i64, i64* %__b
8312 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8313 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8314 %2 = icmp sgt <2 x i64> %0, %1
8315 %3 = bitcast i8 %__u to <8 x i1>
8316 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8317 %4 = and <2 x i1> %extract.i, %2
8318 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8319 %6 = bitcast <32 x i1> %5 to i32
8324 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8325 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8326 ; VLX: # %bb.0: # %entry
8327 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
8328 ; VLX-NEXT: kmovq %k0, %rax
8331 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
8332 ; NoVLX: # %bb.0: # %entry
8333 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8334 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8335 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8336 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8337 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8338 ; NoVLX-NEXT: kmovw %k0, %eax
8339 ; NoVLX-NEXT: movzwl %ax, %eax
8340 ; NoVLX-NEXT: vzeroupper
8343 %0 = bitcast <2 x i64> %__a to <2 x i64>
8344 %1 = bitcast <2 x i64> %__b to <2 x i64>
8345 %2 = icmp sgt <2 x i64> %0, %1
8346 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8347 %4 = bitcast <64 x i1> %3 to i64
8351 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8352 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8353 ; VLX: # %bb.0: # %entry
8354 ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
8355 ; VLX-NEXT: kmovq %k0, %rax
8358 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
8359 ; NoVLX: # %bb.0: # %entry
8360 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8361 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
8362 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8363 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8364 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8365 ; NoVLX-NEXT: kmovw %k0, %eax
8366 ; NoVLX-NEXT: movzwl %ax, %eax
8367 ; NoVLX-NEXT: vzeroupper
8370 %0 = bitcast <2 x i64> %__a to <2 x i64>
8371 %load = load <2 x i64>, <2 x i64>* %__b
8372 %1 = bitcast <2 x i64> %load to <2 x i64>
8373 %2 = icmp sgt <2 x i64> %0, %1
8374 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8375 %4 = bitcast <64 x i1> %3 to i64
8379 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
8380 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8381 ; VLX: # %bb.0: # %entry
8382 ; VLX-NEXT: kmovd %edi, %k1
8383 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
8384 ; VLX-NEXT: kmovq %k0, %rax
8387 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
8388 ; NoVLX: # %bb.0: # %entry
8389 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
8390 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8391 ; NoVLX-NEXT: kmovw %edi, %k1
8392 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8393 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8394 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8395 ; NoVLX-NEXT: kmovw %k0, %eax
8396 ; NoVLX-NEXT: movzwl %ax, %eax
8397 ; NoVLX-NEXT: vzeroupper
8400 %0 = bitcast <2 x i64> %__a to <2 x i64>
8401 %1 = bitcast <2 x i64> %__b to <2 x i64>
8402 %2 = icmp sgt <2 x i64> %0, %1
8403 %3 = bitcast i8 %__u to <8 x i1>
8404 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8405 %4 = and <2 x i1> %2, %extract.i
8406 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8407 %6 = bitcast <64 x i1> %5 to i64
8411 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
8412 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8413 ; VLX: # %bb.0: # %entry
8414 ; VLX-NEXT: kmovd %edi, %k1
8415 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
8416 ; VLX-NEXT: kmovq %k0, %rax
8419 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
8420 ; NoVLX: # %bb.0: # %entry
8421 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8422 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
8423 ; NoVLX-NEXT: kmovw %edi, %k1
8424 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8425 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8426 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8427 ; NoVLX-NEXT: kmovw %k0, %eax
8428 ; NoVLX-NEXT: movzwl %ax, %eax
8429 ; NoVLX-NEXT: vzeroupper
8432 %0 = bitcast <2 x i64> %__a to <2 x i64>
8433 %load = load <2 x i64>, <2 x i64>* %__b
8434 %1 = bitcast <2 x i64> %load to <2 x i64>
8435 %2 = icmp sgt <2 x i64> %0, %1
8436 %3 = bitcast i8 %__u to <8 x i1>
8437 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8438 %4 = and <2 x i1> %2, %extract.i
8439 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8440 %6 = bitcast <64 x i1> %5 to i64
8445 define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
8446 ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8447 ; VLX: # %bb.0: # %entry
8448 ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
8449 ; VLX-NEXT: kmovq %k0, %rax
8452 ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8453 ; NoVLX: # %bb.0: # %entry
8454 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8455 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
8456 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8457 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8458 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8459 ; NoVLX-NEXT: kmovw %k0, %eax
8460 ; NoVLX-NEXT: movzwl %ax, %eax
8461 ; NoVLX-NEXT: vzeroupper
8464 %0 = bitcast <2 x i64> %__a to <2 x i64>
8465 %load = load i64, i64* %__b
8466 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8467 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8468 %2 = icmp sgt <2 x i64> %0, %1
8469 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8470 %4 = bitcast <64 x i1> %3 to i64
8474 define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
8475 ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8476 ; VLX: # %bb.0: # %entry
8477 ; VLX-NEXT: kmovd %edi, %k1
8478 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
8479 ; VLX-NEXT: kmovq %k0, %rax
8482 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
8483 ; NoVLX: # %bb.0: # %entry
8484 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8485 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
8486 ; NoVLX-NEXT: kmovw %edi, %k1
8487 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8488 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
8489 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
8490 ; NoVLX-NEXT: kmovw %k0, %eax
8491 ; NoVLX-NEXT: movzwl %ax, %eax
8492 ; NoVLX-NEXT: vzeroupper
8495 %0 = bitcast <2 x i64> %__a to <2 x i64>
8496 %load = load i64, i64* %__b
8497 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
8498 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
8499 %2 = icmp sgt <2 x i64> %0, %1
8500 %3 = bitcast i8 %__u to <8 x i1>
8501 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
8502 %4 = and <2 x i1> %extract.i, %2
8503 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
8504 %6 = bitcast <64 x i1> %5 to i64
8509 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8510 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8511 ; VLX: # %bb.0: # %entry
8512 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8513 ; VLX-NEXT: kmovd %k0, %eax
8514 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8515 ; VLX-NEXT: vzeroupper
8518 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
8519 ; NoVLX: # %bb.0: # %entry
8520 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8521 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8522 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8523 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8524 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8525 ; NoVLX-NEXT: kmovw %k0, %eax
8526 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8527 ; NoVLX-NEXT: vzeroupper
8530 %0 = bitcast <4 x i64> %__a to <4 x i64>
8531 %1 = bitcast <4 x i64> %__b to <4 x i64>
8532 %2 = icmp sgt <4 x i64> %0, %1
8533 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8534 %4 = bitcast <8 x i1> %3 to i8
8538 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8539 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8540 ; VLX: # %bb.0: # %entry
8541 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8542 ; VLX-NEXT: kmovd %k0, %eax
8543 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8544 ; VLX-NEXT: vzeroupper
8547 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
8548 ; NoVLX: # %bb.0: # %entry
8549 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8550 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8551 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8552 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8553 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8554 ; NoVLX-NEXT: kmovw %k0, %eax
8555 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8556 ; NoVLX-NEXT: vzeroupper
8559 %0 = bitcast <4 x i64> %__a to <4 x i64>
8560 %load = load <4 x i64>, <4 x i64>* %__b
8561 %1 = bitcast <4 x i64> %load to <4 x i64>
8562 %2 = icmp sgt <4 x i64> %0, %1
8563 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8564 %4 = bitcast <8 x i1> %3 to i8
8568 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8569 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8570 ; VLX: # %bb.0: # %entry
8571 ; VLX-NEXT: kmovd %edi, %k1
8572 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8573 ; VLX-NEXT: kmovd %k0, %eax
8574 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8575 ; VLX-NEXT: vzeroupper
8578 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
8579 ; NoVLX: # %bb.0: # %entry
8580 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8581 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8582 ; NoVLX-NEXT: kmovw %edi, %k1
8583 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8584 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8585 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8586 ; NoVLX-NEXT: kmovw %k0, %eax
8587 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8588 ; NoVLX-NEXT: vzeroupper
8591 %0 = bitcast <4 x i64> %__a to <4 x i64>
8592 %1 = bitcast <4 x i64> %__b to <4 x i64>
8593 %2 = icmp sgt <4 x i64> %0, %1
8594 %3 = bitcast i8 %__u to <8 x i1>
8595 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8596 %4 = and <4 x i1> %2, %extract.i
8597 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8598 %6 = bitcast <8 x i1> %5 to i8
8602 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8603 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8604 ; VLX: # %bb.0: # %entry
8605 ; VLX-NEXT: kmovd %edi, %k1
8606 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8607 ; VLX-NEXT: kmovd %k0, %eax
8608 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8609 ; VLX-NEXT: vzeroupper
8612 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
8613 ; NoVLX: # %bb.0: # %entry
8614 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8615 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8616 ; NoVLX-NEXT: kmovw %edi, %k1
8617 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8618 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8619 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8620 ; NoVLX-NEXT: kmovw %k0, %eax
8621 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8622 ; NoVLX-NEXT: vzeroupper
8625 %0 = bitcast <4 x i64> %__a to <4 x i64>
8626 %load = load <4 x i64>, <4 x i64>* %__b
8627 %1 = bitcast <4 x i64> %load to <4 x i64>
8628 %2 = icmp sgt <4 x i64> %0, %1
8629 %3 = bitcast i8 %__u to <8 x i1>
8630 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8631 %4 = and <4 x i1> %2, %extract.i
8632 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8633 %6 = bitcast <8 x i1> %5 to i8
8638 define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
8639 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8640 ; VLX: # %bb.0: # %entry
8641 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8642 ; VLX-NEXT: kmovd %k0, %eax
8643 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8644 ; VLX-NEXT: vzeroupper
8647 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8648 ; NoVLX: # %bb.0: # %entry
8649 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8650 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
8651 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8652 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8653 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8654 ; NoVLX-NEXT: kmovw %k0, %eax
8655 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8656 ; NoVLX-NEXT: vzeroupper
8659 %0 = bitcast <4 x i64> %__a to <4 x i64>
8660 %load = load i64, i64* %__b
8661 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8662 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8663 %2 = icmp sgt <4 x i64> %0, %1
8664 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8665 %4 = bitcast <8 x i1> %3 to i8
8669 define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
8670 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8671 ; VLX: # %bb.0: # %entry
8672 ; VLX-NEXT: kmovd %edi, %k1
8673 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8674 ; VLX-NEXT: kmovd %k0, %eax
8675 ; VLX-NEXT: # kill: def $al killed $al killed $eax
8676 ; VLX-NEXT: vzeroupper
8679 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
8680 ; NoVLX: # %bb.0: # %entry
8681 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8682 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
8683 ; NoVLX-NEXT: kmovw %edi, %k1
8684 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8685 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8686 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8687 ; NoVLX-NEXT: kmovw %k0, %eax
8688 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
8689 ; NoVLX-NEXT: vzeroupper
8692 %0 = bitcast <4 x i64> %__a to <4 x i64>
8693 %load = load i64, i64* %__b
8694 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8695 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8696 %2 = icmp sgt <4 x i64> %0, %1
8697 %3 = bitcast i8 %__u to <8 x i1>
8698 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8699 %4 = and <4 x i1> %extract.i, %2
8700 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8701 %6 = bitcast <8 x i1> %5 to i8
8706 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8707 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8708 ; VLX: # %bb.0: # %entry
8709 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8710 ; VLX-NEXT: kmovd %k0, %eax
8711 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8712 ; VLX-NEXT: vzeroupper
8715 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
8716 ; NoVLX: # %bb.0: # %entry
8717 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8718 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8719 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8720 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8721 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8722 ; NoVLX-NEXT: kmovw %k0, %eax
8723 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8724 ; NoVLX-NEXT: vzeroupper
8727 %0 = bitcast <4 x i64> %__a to <4 x i64>
8728 %1 = bitcast <4 x i64> %__b to <4 x i64>
8729 %2 = icmp sgt <4 x i64> %0, %1
8730 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8731 %4 = bitcast <16 x i1> %3 to i16
8735 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8736 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8737 ; VLX: # %bb.0: # %entry
8738 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8739 ; VLX-NEXT: kmovd %k0, %eax
8740 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8741 ; VLX-NEXT: vzeroupper
8744 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
8745 ; NoVLX: # %bb.0: # %entry
8746 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8747 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8748 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8749 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8750 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8751 ; NoVLX-NEXT: kmovw %k0, %eax
8752 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8753 ; NoVLX-NEXT: vzeroupper
8756 %0 = bitcast <4 x i64> %__a to <4 x i64>
8757 %load = load <4 x i64>, <4 x i64>* %__b
8758 %1 = bitcast <4 x i64> %load to <4 x i64>
8759 %2 = icmp sgt <4 x i64> %0, %1
8760 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8761 %4 = bitcast <16 x i1> %3 to i16
8765 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8766 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8767 ; VLX: # %bb.0: # %entry
8768 ; VLX-NEXT: kmovd %edi, %k1
8769 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8770 ; VLX-NEXT: kmovd %k0, %eax
8771 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8772 ; VLX-NEXT: vzeroupper
8775 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
8776 ; NoVLX: # %bb.0: # %entry
8777 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8778 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8779 ; NoVLX-NEXT: kmovw %edi, %k1
8780 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8781 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8782 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8783 ; NoVLX-NEXT: kmovw %k0, %eax
8784 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8785 ; NoVLX-NEXT: vzeroupper
8788 %0 = bitcast <4 x i64> %__a to <4 x i64>
8789 %1 = bitcast <4 x i64> %__b to <4 x i64>
8790 %2 = icmp sgt <4 x i64> %0, %1
8791 %3 = bitcast i8 %__u to <8 x i1>
8792 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8793 %4 = and <4 x i1> %2, %extract.i
8794 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8795 %6 = bitcast <16 x i1> %5 to i16
8799 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8800 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8801 ; VLX: # %bb.0: # %entry
8802 ; VLX-NEXT: kmovd %edi, %k1
8803 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8804 ; VLX-NEXT: kmovd %k0, %eax
8805 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8806 ; VLX-NEXT: vzeroupper
8809 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
8810 ; NoVLX: # %bb.0: # %entry
8811 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8812 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
8813 ; NoVLX-NEXT: kmovw %edi, %k1
8814 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8815 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8816 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8817 ; NoVLX-NEXT: kmovw %k0, %eax
8818 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8819 ; NoVLX-NEXT: vzeroupper
8822 %0 = bitcast <4 x i64> %__a to <4 x i64>
8823 %load = load <4 x i64>, <4 x i64>* %__b
8824 %1 = bitcast <4 x i64> %load to <4 x i64>
8825 %2 = icmp sgt <4 x i64> %0, %1
8826 %3 = bitcast i8 %__u to <8 x i1>
8827 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8828 %4 = and <4 x i1> %2, %extract.i
8829 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8830 %6 = bitcast <16 x i1> %5 to i16
8835 define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
8836 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8837 ; VLX: # %bb.0: # %entry
8838 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
8839 ; VLX-NEXT: kmovd %k0, %eax
8840 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8841 ; VLX-NEXT: vzeroupper
8844 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8845 ; NoVLX: # %bb.0: # %entry
8846 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8847 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
8848 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8849 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8850 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8851 ; NoVLX-NEXT: kmovw %k0, %eax
8852 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8853 ; NoVLX-NEXT: vzeroupper
8856 %0 = bitcast <4 x i64> %__a to <4 x i64>
8857 %load = load i64, i64* %__b
8858 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8859 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8860 %2 = icmp sgt <4 x i64> %0, %1
8861 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8862 %4 = bitcast <16 x i1> %3 to i16
8866 define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
8867 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8868 ; VLX: # %bb.0: # %entry
8869 ; VLX-NEXT: kmovd %edi, %k1
8870 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
8871 ; VLX-NEXT: kmovd %k0, %eax
8872 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
8873 ; VLX-NEXT: vzeroupper
8876 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
8877 ; NoVLX: # %bb.0: # %entry
8878 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8879 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
8880 ; NoVLX-NEXT: kmovw %edi, %k1
8881 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8882 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8883 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8884 ; NoVLX-NEXT: kmovw %k0, %eax
8885 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
8886 ; NoVLX-NEXT: vzeroupper
8889 %0 = bitcast <4 x i64> %__a to <4 x i64>
8890 %load = load i64, i64* %__b
8891 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
8892 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8893 %2 = icmp sgt <4 x i64> %0, %1
8894 %3 = bitcast i8 %__u to <8 x i1>
8895 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8896 %4 = and <4 x i1> %extract.i, %2
8897 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8898 %6 = bitcast <16 x i1> %5 to i16
8903 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8904 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8905 ; VLX: # %bb.0: # %entry
8906 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
8907 ; VLX-NEXT: kmovd %k0, %eax
8908 ; VLX-NEXT: vzeroupper
8911 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
8912 ; NoVLX: # %bb.0: # %entry
8913 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8914 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8915 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8916 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8917 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8918 ; NoVLX-NEXT: kmovw %k0, %eax
8919 ; NoVLX-NEXT: vzeroupper
8922 %0 = bitcast <4 x i64> %__a to <4 x i64>
8923 %1 = bitcast <4 x i64> %__b to <4 x i64>
8924 %2 = icmp sgt <4 x i64> %0, %1
8925 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8926 %4 = bitcast <32 x i1> %3 to i32
8930 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8931 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8932 ; VLX: # %bb.0: # %entry
8933 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
8934 ; VLX-NEXT: kmovd %k0, %eax
8935 ; VLX-NEXT: vzeroupper
8938 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
8939 ; NoVLX: # %bb.0: # %entry
8940 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8941 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
8942 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
8943 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8944 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8945 ; NoVLX-NEXT: kmovw %k0, %eax
8946 ; NoVLX-NEXT: vzeroupper
8949 %0 = bitcast <4 x i64> %__a to <4 x i64>
8950 %load = load <4 x i64>, <4 x i64>* %__b
8951 %1 = bitcast <4 x i64> %load to <4 x i64>
8952 %2 = icmp sgt <4 x i64> %0, %1
8953 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8954 %4 = bitcast <32 x i1> %3 to i32
8958 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
8959 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8960 ; VLX: # %bb.0: # %entry
8961 ; VLX-NEXT: kmovd %edi, %k1
8962 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
8963 ; VLX-NEXT: kmovd %k0, %eax
8964 ; VLX-NEXT: vzeroupper
8967 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
8968 ; NoVLX: # %bb.0: # %entry
8969 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
8970 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
8971 ; NoVLX-NEXT: kmovw %edi, %k1
8972 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
8973 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
8974 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
8975 ; NoVLX-NEXT: kmovw %k0, %eax
8976 ; NoVLX-NEXT: vzeroupper
8979 %0 = bitcast <4 x i64> %__a to <4 x i64>
8980 %1 = bitcast <4 x i64> %__b to <4 x i64>
8981 %2 = icmp sgt <4 x i64> %0, %1
8982 %3 = bitcast i8 %__u to <8 x i1>
8983 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8984 %4 = and <4 x i1> %2, %extract.i
8985 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
8986 %6 = bitcast <32 x i1> %5 to i32
8990 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
8991 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
8992 ; VLX: # %bb.0: # %entry
8993 ; VLX-NEXT: kmovd %edi, %k1
8994 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
8995 ; VLX-NEXT: kmovd %k0, %eax
8996 ; VLX-NEXT: vzeroupper
8999 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
9000 ; NoVLX: # %bb.0: # %entry
9001 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9002 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
9003 ; NoVLX-NEXT: kmovw %edi, %k1
9004 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9005 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9006 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9007 ; NoVLX-NEXT: kmovw %k0, %eax
9008 ; NoVLX-NEXT: vzeroupper
9011 %0 = bitcast <4 x i64> %__a to <4 x i64>
9012 %load = load <4 x i64>, <4 x i64>* %__b
9013 %1 = bitcast <4 x i64> %load to <4 x i64>
9014 %2 = icmp sgt <4 x i64> %0, %1
9015 %3 = bitcast i8 %__u to <8 x i1>
9016 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9017 %4 = and <4 x i1> %2, %extract.i
9018 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9019 %6 = bitcast <32 x i1> %5 to i32
9024 define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
9025 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
9026 ; VLX: # %bb.0: # %entry
9027 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
9028 ; VLX-NEXT: kmovd %k0, %eax
9029 ; VLX-NEXT: vzeroupper
9032 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
9033 ; NoVLX: # %bb.0: # %entry
9034 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9035 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
9036 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9037 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9038 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9039 ; NoVLX-NEXT: kmovw %k0, %eax
9040 ; NoVLX-NEXT: vzeroupper
9043 %0 = bitcast <4 x i64> %__a to <4 x i64>
9044 %load = load i64, i64* %__b
9045 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9046 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9047 %2 = icmp sgt <4 x i64> %0, %1
9048 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9049 %4 = bitcast <32 x i1> %3 to i32
9053 define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
9054 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
9055 ; VLX: # %bb.0: # %entry
9056 ; VLX-NEXT: kmovd %edi, %k1
9057 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
9058 ; VLX-NEXT: kmovd %k0, %eax
9059 ; VLX-NEXT: vzeroupper
9062 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
9063 ; NoVLX: # %bb.0: # %entry
9064 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9065 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
9066 ; NoVLX-NEXT: kmovw %edi, %k1
9067 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9068 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9069 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9070 ; NoVLX-NEXT: kmovw %k0, %eax
9071 ; NoVLX-NEXT: vzeroupper
9074 %0 = bitcast <4 x i64> %__a to <4 x i64>
9075 %load = load i64, i64* %__b
9076 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9077 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9078 %2 = icmp sgt <4 x i64> %0, %1
9079 %3 = bitcast i8 %__u to <8 x i1>
9080 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9081 %4 = and <4 x i1> %extract.i, %2
9082 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9083 %6 = bitcast <32 x i1> %5 to i32
9088 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9089 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
9090 ; VLX: # %bb.0: # %entry
9091 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
9092 ; VLX-NEXT: kmovq %k0, %rax
9093 ; VLX-NEXT: vzeroupper
9096 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
9097 ; NoVLX: # %bb.0: # %entry
9098 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
9099 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9100 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9101 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9102 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9103 ; NoVLX-NEXT: kmovw %k0, %eax
9104 ; NoVLX-NEXT: movzwl %ax, %eax
9105 ; NoVLX-NEXT: vzeroupper
9108 %0 = bitcast <4 x i64> %__a to <4 x i64>
9109 %1 = bitcast <4 x i64> %__b to <4 x i64>
9110 %2 = icmp sgt <4 x i64> %0, %1
9111 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9112 %4 = bitcast <64 x i1> %3 to i64
9116 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
9117 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
9118 ; VLX: # %bb.0: # %entry
9119 ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
9120 ; VLX-NEXT: kmovq %k0, %rax
9121 ; VLX-NEXT: vzeroupper
9124 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
9125 ; NoVLX: # %bb.0: # %entry
9126 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9127 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
9128 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9129 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9130 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9131 ; NoVLX-NEXT: kmovw %k0, %eax
9132 ; NoVLX-NEXT: movzwl %ax, %eax
9133 ; NoVLX-NEXT: vzeroupper
9136 %0 = bitcast <4 x i64> %__a to <4 x i64>
9137 %load = load <4 x i64>, <4 x i64>* %__b
9138 %1 = bitcast <4 x i64> %load to <4 x i64>
9139 %2 = icmp sgt <4 x i64> %0, %1
9140 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9141 %4 = bitcast <64 x i1> %3 to i64
9145 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
9146 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9147 ; VLX: # %bb.0: # %entry
9148 ; VLX-NEXT: kmovd %edi, %k1
9149 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
9150 ; VLX-NEXT: kmovq %k0, %rax
9151 ; VLX-NEXT: vzeroupper
9154 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
9155 ; NoVLX: # %bb.0: # %entry
9156 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
9157 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9158 ; NoVLX-NEXT: kmovw %edi, %k1
9159 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9160 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9161 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9162 ; NoVLX-NEXT: kmovw %k0, %eax
9163 ; NoVLX-NEXT: movzwl %ax, %eax
9164 ; NoVLX-NEXT: vzeroupper
9167 %0 = bitcast <4 x i64> %__a to <4 x i64>
9168 %1 = bitcast <4 x i64> %__b to <4 x i64>
9169 %2 = icmp sgt <4 x i64> %0, %1
9170 %3 = bitcast i8 %__u to <8 x i1>
9171 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9172 %4 = and <4 x i1> %2, %extract.i
9173 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9174 %6 = bitcast <64 x i1> %5 to i64
9178 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
9179 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9180 ; VLX: # %bb.0: # %entry
9181 ; VLX-NEXT: kmovd %edi, %k1
9182 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
9183 ; VLX-NEXT: kmovq %k0, %rax
9184 ; VLX-NEXT: vzeroupper
9187 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
9188 ; NoVLX: # %bb.0: # %entry
9189 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9190 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
9191 ; NoVLX-NEXT: kmovw %edi, %k1
9192 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9193 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9194 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9195 ; NoVLX-NEXT: kmovw %k0, %eax
9196 ; NoVLX-NEXT: movzwl %ax, %eax
9197 ; NoVLX-NEXT: vzeroupper
9200 %0 = bitcast <4 x i64> %__a to <4 x i64>
9201 %load = load <4 x i64>, <4 x i64>* %__b
9202 %1 = bitcast <4 x i64> %load to <4 x i64>
9203 %2 = icmp sgt <4 x i64> %0, %1
9204 %3 = bitcast i8 %__u to <8 x i1>
9205 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9206 %4 = and <4 x i1> %2, %extract.i
9207 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9208 %6 = bitcast <64 x i1> %5 to i64
9213 define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
9214 ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9215 ; VLX: # %bb.0: # %entry
9216 ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
9217 ; VLX-NEXT: kmovq %k0, %rax
9218 ; VLX-NEXT: vzeroupper
9221 ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9222 ; NoVLX: # %bb.0: # %entry
9223 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9224 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
9225 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9226 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9227 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9228 ; NoVLX-NEXT: kmovw %k0, %eax
9229 ; NoVLX-NEXT: movzwl %ax, %eax
9230 ; NoVLX-NEXT: vzeroupper
9233 %0 = bitcast <4 x i64> %__a to <4 x i64>
9234 %load = load i64, i64* %__b
9235 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9236 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9237 %2 = icmp sgt <4 x i64> %0, %1
9238 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9239 %4 = bitcast <64 x i1> %3 to i64
9243 define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
9244 ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9245 ; VLX: # %bb.0: # %entry
9246 ; VLX-NEXT: kmovd %edi, %k1
9247 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
9248 ; VLX-NEXT: kmovq %k0, %rax
9249 ; VLX-NEXT: vzeroupper
9252 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
9253 ; NoVLX: # %bb.0: # %entry
9254 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
9255 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
9256 ; NoVLX-NEXT: kmovw %edi, %k1
9257 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9258 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
9259 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
9260 ; NoVLX-NEXT: kmovw %k0, %eax
9261 ; NoVLX-NEXT: movzwl %ax, %eax
9262 ; NoVLX-NEXT: vzeroupper
9265 %0 = bitcast <4 x i64> %__a to <4 x i64>
9266 %load = load i64, i64* %__b
9267 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
9268 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
9269 %2 = icmp sgt <4 x i64> %0, %1
9270 %3 = bitcast i8 %__u to <8 x i1>
9271 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9272 %4 = and <4 x i1> %extract.i, %2
9273 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
9274 %6 = bitcast <64 x i1> %5 to i64
9279 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9280 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9281 ; VLX: # %bb.0: # %entry
9282 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9283 ; VLX-NEXT: kmovd %k0, %eax
9284 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9285 ; VLX-NEXT: vzeroupper
9288 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
9289 ; NoVLX: # %bb.0: # %entry
9290 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9291 ; NoVLX-NEXT: kmovw %k0, %eax
9292 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9293 ; NoVLX-NEXT: vzeroupper
9296 %0 = bitcast <8 x i64> %__a to <8 x i64>
9297 %1 = bitcast <8 x i64> %__b to <8 x i64>
9298 %2 = icmp sgt <8 x i64> %0, %1
9299 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9300 %4 = bitcast <16 x i1> %3 to i16
9304 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9305 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9306 ; VLX: # %bb.0: # %entry
9307 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9308 ; VLX-NEXT: kmovd %k0, %eax
9309 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9310 ; VLX-NEXT: vzeroupper
9313 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
9314 ; NoVLX: # %bb.0: # %entry
9315 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9316 ; NoVLX-NEXT: kmovw %k0, %eax
9317 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9318 ; NoVLX-NEXT: vzeroupper
9321 %0 = bitcast <8 x i64> %__a to <8 x i64>
9322 %load = load <8 x i64>, <8 x i64>* %__b
9323 %1 = bitcast <8 x i64> %load to <8 x i64>
9324 %2 = icmp sgt <8 x i64> %0, %1
9325 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9326 %4 = bitcast <16 x i1> %3 to i16
9330 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9331 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9332 ; VLX: # %bb.0: # %entry
9333 ; VLX-NEXT: kmovd %edi, %k1
9334 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9335 ; VLX-NEXT: kmovd %k0, %eax
9336 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9337 ; VLX-NEXT: vzeroupper
9340 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
9341 ; NoVLX: # %bb.0: # %entry
9342 ; NoVLX-NEXT: kmovw %edi, %k1
9343 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9344 ; NoVLX-NEXT: kmovw %k0, %eax
9345 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9346 ; NoVLX-NEXT: vzeroupper
9349 %0 = bitcast <8 x i64> %__a to <8 x i64>
9350 %1 = bitcast <8 x i64> %__b to <8 x i64>
9351 %2 = icmp sgt <8 x i64> %0, %1
9352 %3 = bitcast i8 %__u to <8 x i1>
9353 %4 = and <8 x i1> %2, %3
9354 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9355 %6 = bitcast <16 x i1> %5 to i16
9359 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9360 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9361 ; VLX: # %bb.0: # %entry
9362 ; VLX-NEXT: kmovd %edi, %k1
9363 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9364 ; VLX-NEXT: kmovd %k0, %eax
9365 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9366 ; VLX-NEXT: vzeroupper
9369 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
9370 ; NoVLX: # %bb.0: # %entry
9371 ; NoVLX-NEXT: kmovw %edi, %k1
9372 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9373 ; NoVLX-NEXT: kmovw %k0, %eax
9374 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9375 ; NoVLX-NEXT: vzeroupper
9378 %0 = bitcast <8 x i64> %__a to <8 x i64>
9379 %load = load <8 x i64>, <8 x i64>* %__b
9380 %1 = bitcast <8 x i64> %load to <8 x i64>
9381 %2 = icmp sgt <8 x i64> %0, %1
9382 %3 = bitcast i8 %__u to <8 x i1>
9383 %4 = and <8 x i1> %2, %3
9384 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9385 %6 = bitcast <16 x i1> %5 to i16
9390 define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9391 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9392 ; VLX: # %bb.0: # %entry
9393 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9394 ; VLX-NEXT: kmovd %k0, %eax
9395 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9396 ; VLX-NEXT: vzeroupper
9399 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9400 ; NoVLX: # %bb.0: # %entry
9401 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9402 ; NoVLX-NEXT: kmovw %k0, %eax
9403 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9404 ; NoVLX-NEXT: vzeroupper
9407 %0 = bitcast <8 x i64> %__a to <8 x i64>
9408 %load = load i64, i64* %__b
9409 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9410 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9411 %2 = icmp sgt <8 x i64> %0, %1
9412 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9413 %4 = bitcast <16 x i1> %3 to i16
9417 define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9418 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9419 ; VLX: # %bb.0: # %entry
9420 ; VLX-NEXT: kmovd %edi, %k1
9421 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9422 ; VLX-NEXT: kmovd %k0, %eax
9423 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
9424 ; VLX-NEXT: vzeroupper
9427 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
9428 ; NoVLX: # %bb.0: # %entry
9429 ; NoVLX-NEXT: kmovw %edi, %k1
9430 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9431 ; NoVLX-NEXT: kmovw %k0, %eax
9432 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
9433 ; NoVLX-NEXT: vzeroupper
9436 %0 = bitcast <8 x i64> %__a to <8 x i64>
9437 %load = load i64, i64* %__b
9438 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9439 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9440 %2 = icmp sgt <8 x i64> %0, %1
9441 %3 = bitcast i8 %__u to <8 x i1>
9442 %4 = and <8 x i1> %3, %2
9443 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9444 %6 = bitcast <16 x i1> %5 to i16
9449 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9450 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9451 ; VLX: # %bb.0: # %entry
9452 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9453 ; VLX-NEXT: kmovd %k0, %eax
9454 ; VLX-NEXT: vzeroupper
9457 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
9458 ; NoVLX: # %bb.0: # %entry
9459 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9460 ; NoVLX-NEXT: kmovw %k0, %eax
9461 ; NoVLX-NEXT: vzeroupper
9464 %0 = bitcast <8 x i64> %__a to <8 x i64>
9465 %1 = bitcast <8 x i64> %__b to <8 x i64>
9466 %2 = icmp sgt <8 x i64> %0, %1
9467 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9468 %4 = bitcast <32 x i1> %3 to i32
9472 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9473 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9474 ; VLX: # %bb.0: # %entry
9475 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9476 ; VLX-NEXT: kmovd %k0, %eax
9477 ; VLX-NEXT: vzeroupper
9480 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
9481 ; NoVLX: # %bb.0: # %entry
9482 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9483 ; NoVLX-NEXT: kmovw %k0, %eax
9484 ; NoVLX-NEXT: vzeroupper
9487 %0 = bitcast <8 x i64> %__a to <8 x i64>
9488 %load = load <8 x i64>, <8 x i64>* %__b
9489 %1 = bitcast <8 x i64> %load to <8 x i64>
9490 %2 = icmp sgt <8 x i64> %0, %1
9491 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9492 %4 = bitcast <32 x i1> %3 to i32
9496 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9497 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9498 ; VLX: # %bb.0: # %entry
9499 ; VLX-NEXT: kmovd %edi, %k1
9500 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9501 ; VLX-NEXT: kmovd %k0, %eax
9502 ; VLX-NEXT: vzeroupper
9505 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
9506 ; NoVLX: # %bb.0: # %entry
9507 ; NoVLX-NEXT: kmovw %edi, %k1
9508 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9509 ; NoVLX-NEXT: kmovw %k0, %eax
9510 ; NoVLX-NEXT: vzeroupper
9513 %0 = bitcast <8 x i64> %__a to <8 x i64>
9514 %1 = bitcast <8 x i64> %__b to <8 x i64>
9515 %2 = icmp sgt <8 x i64> %0, %1
9516 %3 = bitcast i8 %__u to <8 x i1>
9517 %4 = and <8 x i1> %2, %3
9518 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9519 %6 = bitcast <32 x i1> %5 to i32
9523 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9524 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9525 ; VLX: # %bb.0: # %entry
9526 ; VLX-NEXT: kmovd %edi, %k1
9527 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9528 ; VLX-NEXT: kmovd %k0, %eax
9529 ; VLX-NEXT: vzeroupper
9532 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
9533 ; NoVLX: # %bb.0: # %entry
9534 ; NoVLX-NEXT: kmovw %edi, %k1
9535 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9536 ; NoVLX-NEXT: kmovw %k0, %eax
9537 ; NoVLX-NEXT: vzeroupper
9540 %0 = bitcast <8 x i64> %__a to <8 x i64>
9541 %load = load <8 x i64>, <8 x i64>* %__b
9542 %1 = bitcast <8 x i64> %load to <8 x i64>
9543 %2 = icmp sgt <8 x i64> %0, %1
9544 %3 = bitcast i8 %__u to <8 x i1>
9545 %4 = and <8 x i1> %2, %3
9546 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9547 %6 = bitcast <32 x i1> %5 to i32
9552 define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9553 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9554 ; VLX: # %bb.0: # %entry
9555 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9556 ; VLX-NEXT: kmovd %k0, %eax
9557 ; VLX-NEXT: vzeroupper
9560 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9561 ; NoVLX: # %bb.0: # %entry
9562 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9563 ; NoVLX-NEXT: kmovw %k0, %eax
9564 ; NoVLX-NEXT: vzeroupper
9567 %0 = bitcast <8 x i64> %__a to <8 x i64>
9568 %load = load i64, i64* %__b
9569 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9570 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9571 %2 = icmp sgt <8 x i64> %0, %1
9572 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9573 %4 = bitcast <32 x i1> %3 to i32
9577 define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9578 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9579 ; VLX: # %bb.0: # %entry
9580 ; VLX-NEXT: kmovd %edi, %k1
9581 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9582 ; VLX-NEXT: kmovd %k0, %eax
9583 ; VLX-NEXT: vzeroupper
9586 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
9587 ; NoVLX: # %bb.0: # %entry
9588 ; NoVLX-NEXT: kmovw %edi, %k1
9589 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9590 ; NoVLX-NEXT: kmovw %k0, %eax
9591 ; NoVLX-NEXT: vzeroupper
9594 %0 = bitcast <8 x i64> %__a to <8 x i64>
9595 %load = load i64, i64* %__b
9596 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9597 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9598 %2 = icmp sgt <8 x i64> %0, %1
9599 %3 = bitcast i8 %__u to <8 x i1>
9600 %4 = and <8 x i1> %3, %2
9601 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9602 %6 = bitcast <32 x i1> %5 to i32
9607 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9608 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9609 ; VLX: # %bb.0: # %entry
9610 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9611 ; VLX-NEXT: kmovq %k0, %rax
9612 ; VLX-NEXT: vzeroupper
9615 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
9616 ; NoVLX: # %bb.0: # %entry
9617 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
9618 ; NoVLX-NEXT: kmovw %k0, %eax
9619 ; NoVLX-NEXT: movzwl %ax, %eax
9620 ; NoVLX-NEXT: vzeroupper
9623 %0 = bitcast <8 x i64> %__a to <8 x i64>
9624 %1 = bitcast <8 x i64> %__b to <8 x i64>
9625 %2 = icmp sgt <8 x i64> %0, %1
9626 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9627 %4 = bitcast <64 x i1> %3 to i64
9631 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9632 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9633 ; VLX: # %bb.0: # %entry
9634 ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9635 ; VLX-NEXT: kmovq %k0, %rax
9636 ; VLX-NEXT: vzeroupper
9639 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
9640 ; NoVLX: # %bb.0: # %entry
9641 ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
9642 ; NoVLX-NEXT: kmovw %k0, %eax
9643 ; NoVLX-NEXT: movzwl %ax, %eax
9644 ; NoVLX-NEXT: vzeroupper
9647 %0 = bitcast <8 x i64> %__a to <8 x i64>
9648 %load = load <8 x i64>, <8 x i64>* %__b
9649 %1 = bitcast <8 x i64> %load to <8 x i64>
9650 %2 = icmp sgt <8 x i64> %0, %1
9651 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9652 %4 = bitcast <64 x i1> %3 to i64
9656 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
9657 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9658 ; VLX: # %bb.0: # %entry
9659 ; VLX-NEXT: kmovd %edi, %k1
9660 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9661 ; VLX-NEXT: kmovq %k0, %rax
9662 ; VLX-NEXT: vzeroupper
9665 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
9666 ; NoVLX: # %bb.0: # %entry
9667 ; NoVLX-NEXT: kmovw %edi, %k1
9668 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
9669 ; NoVLX-NEXT: kmovw %k0, %eax
9670 ; NoVLX-NEXT: movzwl %ax, %eax
9671 ; NoVLX-NEXT: vzeroupper
9674 %0 = bitcast <8 x i64> %__a to <8 x i64>
9675 %1 = bitcast <8 x i64> %__b to <8 x i64>
9676 %2 = icmp sgt <8 x i64> %0, %1
9677 %3 = bitcast i8 %__u to <8 x i1>
9678 %4 = and <8 x i1> %2, %3
9679 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9680 %6 = bitcast <64 x i1> %5 to i64
9684 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
9685 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9686 ; VLX: # %bb.0: # %entry
9687 ; VLX-NEXT: kmovd %edi, %k1
9688 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9689 ; VLX-NEXT: kmovq %k0, %rax
9690 ; VLX-NEXT: vzeroupper
9693 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
9694 ; NoVLX: # %bb.0: # %entry
9695 ; NoVLX-NEXT: kmovw %edi, %k1
9696 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
9697 ; NoVLX-NEXT: kmovw %k0, %eax
9698 ; NoVLX-NEXT: movzwl %ax, %eax
9699 ; NoVLX-NEXT: vzeroupper
9702 %0 = bitcast <8 x i64> %__a to <8 x i64>
9703 %load = load <8 x i64>, <8 x i64>* %__b
9704 %1 = bitcast <8 x i64> %load to <8 x i64>
9705 %2 = icmp sgt <8 x i64> %0, %1
9706 %3 = bitcast i8 %__u to <8 x i1>
9707 %4 = and <8 x i1> %2, %3
9708 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9709 %6 = bitcast <64 x i1> %5 to i64
9714 define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
9715 ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9716 ; VLX: # %bb.0: # %entry
9717 ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9718 ; VLX-NEXT: kmovq %k0, %rax
9719 ; VLX-NEXT: vzeroupper
9722 ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9723 ; NoVLX: # %bb.0: # %entry
9724 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
9725 ; NoVLX-NEXT: kmovw %k0, %eax
9726 ; NoVLX-NEXT: movzwl %ax, %eax
9727 ; NoVLX-NEXT: vzeroupper
9730 %0 = bitcast <8 x i64> %__a to <8 x i64>
9731 %load = load i64, i64* %__b
9732 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9733 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9734 %2 = icmp sgt <8 x i64> %0, %1
9735 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9736 %4 = bitcast <64 x i1> %3 to i64
9740 define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
9741 ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9742 ; VLX: # %bb.0: # %entry
9743 ; VLX-NEXT: kmovd %edi, %k1
9744 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9745 ; VLX-NEXT: kmovq %k0, %rax
9746 ; VLX-NEXT: vzeroupper
9749 ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
9750 ; NoVLX: # %bb.0: # %entry
9751 ; NoVLX-NEXT: kmovw %edi, %k1
9752 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
9753 ; NoVLX-NEXT: kmovw %k0, %eax
9754 ; NoVLX-NEXT: movzwl %ax, %eax
9755 ; NoVLX-NEXT: vzeroupper
9758 %0 = bitcast <8 x i64> %__a to <8 x i64>
9759 %load = load i64, i64* %__b
9760 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
9761 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
9762 %2 = icmp sgt <8 x i64> %0, %1
9763 %3 = bitcast i8 %__u to <8 x i1>
9764 %4 = and <8 x i1> %3, %2
9765 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9766 %6 = bitcast <64 x i1> %5 to i64
9771 define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9772 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9773 ; VLX: # %bb.0: # %entry
9774 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0
9775 ; VLX-NEXT: kmovd %k0, %eax
9778 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
9779 ; NoVLX: # %bb.0: # %entry
9780 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9781 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9782 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9783 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9784 ; NoVLX-NEXT: kmovw %k0, %eax
9785 ; NoVLX-NEXT: vzeroupper
9788 %0 = bitcast <2 x i64> %__a to <16 x i8>
9789 %1 = bitcast <2 x i64> %__b to <16 x i8>
9790 %2 = icmp sge <16 x i8> %0, %1
9791 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9792 %4 = bitcast <32 x i1> %3 to i32
9796 define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9797 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9798 ; VLX: # %bb.0: # %entry
9799 ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
9800 ; VLX-NEXT: kmovd %k0, %eax
9803 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
9804 ; NoVLX: # %bb.0: # %entry
9805 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
9806 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9807 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9808 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9809 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9810 ; NoVLX-NEXT: kmovw %k0, %eax
9811 ; NoVLX-NEXT: vzeroupper
9814 %0 = bitcast <2 x i64> %__a to <16 x i8>
9815 %load = load <2 x i64>, <2 x i64>* %__b
9816 %1 = bitcast <2 x i64> %load to <16 x i8>
9817 %2 = icmp sge <16 x i8> %0, %1
9818 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9819 %4 = bitcast <32 x i1> %3 to i32
9823 define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9824 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9825 ; VLX: # %bb.0: # %entry
9826 ; VLX-NEXT: kmovd %edi, %k1
9827 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9828 ; VLX-NEXT: kmovd %k0, %eax
9831 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
9832 ; NoVLX: # %bb.0: # %entry
9833 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9834 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9835 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9836 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9837 ; NoVLX-NEXT: kmovw %k0, %eax
9838 ; NoVLX-NEXT: andl %edi, %eax
9839 ; NoVLX-NEXT: vzeroupper
9842 %0 = bitcast <2 x i64> %__a to <16 x i8>
9843 %1 = bitcast <2 x i64> %__b to <16 x i8>
9844 %2 = icmp sge <16 x i8> %0, %1
9845 %3 = bitcast i16 %__u to <16 x i1>
9846 %4 = and <16 x i1> %2, %3
9847 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9848 %6 = bitcast <32 x i1> %5 to i32
9852 define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9853 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9854 ; VLX: # %bb.0: # %entry
9855 ; VLX-NEXT: kmovd %edi, %k1
9856 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9857 ; VLX-NEXT: kmovd %k0, %eax
9860 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
9861 ; NoVLX: # %bb.0: # %entry
9862 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
9863 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9864 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9865 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9866 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9867 ; NoVLX-NEXT: kmovw %k0, %eax
9868 ; NoVLX-NEXT: andl %edi, %eax
9869 ; NoVLX-NEXT: vzeroupper
9872 %0 = bitcast <2 x i64> %__a to <16 x i8>
9873 %load = load <2 x i64>, <2 x i64>* %__b
9874 %1 = bitcast <2 x i64> %load to <16 x i8>
9875 %2 = icmp sge <16 x i8> %0, %1
9876 %3 = bitcast i16 %__u to <16 x i1>
9877 %4 = and <16 x i1> %2, %3
9878 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9879 %6 = bitcast <32 x i1> %5 to i32
9884 define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9885 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9886 ; VLX: # %bb.0: # %entry
9887 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0
9888 ; VLX-NEXT: kmovq %k0, %rax
9891 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
9892 ; NoVLX: # %bb.0: # %entry
9893 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9894 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9895 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9896 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9897 ; NoVLX-NEXT: kmovw %k0, %eax
9898 ; NoVLX-NEXT: movzwl %ax, %eax
9899 ; NoVLX-NEXT: vzeroupper
9902 %0 = bitcast <2 x i64> %__a to <16 x i8>
9903 %1 = bitcast <2 x i64> %__b to <16 x i8>
9904 %2 = icmp sge <16 x i8> %0, %1
9905 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9906 %4 = bitcast <64 x i1> %3 to i64
9910 define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9911 ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9912 ; VLX: # %bb.0: # %entry
9913 ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
9914 ; VLX-NEXT: kmovq %k0, %rax
9917 ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
9918 ; NoVLX: # %bb.0: # %entry
9919 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
9920 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9921 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9922 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9923 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9924 ; NoVLX-NEXT: kmovw %k0, %eax
9925 ; NoVLX-NEXT: movzwl %ax, %eax
9926 ; NoVLX-NEXT: vzeroupper
9929 %0 = bitcast <2 x i64> %__a to <16 x i8>
9930 %load = load <2 x i64>, <2 x i64>* %__b
9931 %1 = bitcast <2 x i64> %load to <16 x i8>
9932 %2 = icmp sge <16 x i8> %0, %1
9933 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9934 %4 = bitcast <64 x i1> %3 to i64
9938 define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
9939 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9940 ; VLX: # %bb.0: # %entry
9941 ; VLX-NEXT: kmovd %edi, %k1
9942 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1}
9943 ; VLX-NEXT: kmovq %k0, %rax
9946 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
9947 ; NoVLX: # %bb.0: # %entry
9948 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9949 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9950 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9951 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9952 ; NoVLX-NEXT: kmovw %k0, %eax
9953 ; NoVLX-NEXT: andl %edi, %eax
9954 ; NoVLX-NEXT: vzeroupper
9957 %0 = bitcast <2 x i64> %__a to <16 x i8>
9958 %1 = bitcast <2 x i64> %__b to <16 x i8>
9959 %2 = icmp sge <16 x i8> %0, %1
9960 %3 = bitcast i16 %__u to <16 x i1>
9961 %4 = and <16 x i1> %2, %3
9962 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9963 %6 = bitcast <64 x i1> %5 to i64
9967 define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
9968 ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9969 ; VLX: # %bb.0: # %entry
9970 ; VLX-NEXT: kmovd %edi, %k1
9971 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
9972 ; VLX-NEXT: kmovq %k0, %rax
9975 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
9976 ; NoVLX: # %bb.0: # %entry
9977 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
9978 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
9979 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
9980 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9981 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
9982 ; NoVLX-NEXT: kmovw %k0, %eax
9983 ; NoVLX-NEXT: andl %edi, %eax
9984 ; NoVLX-NEXT: vzeroupper
9987 %0 = bitcast <2 x i64> %__a to <16 x i8>
9988 %load = load <2 x i64>, <2 x i64>* %__b
9989 %1 = bitcast <2 x i64> %load to <16 x i8>
9990 %2 = icmp sge <16 x i8> %0, %1
9991 %3 = bitcast i16 %__u to <16 x i1>
9992 %4 = and <16 x i1> %2, %3
9993 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
9994 %6 = bitcast <64 x i1> %5 to i64
9999 define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10000 ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
10001 ; VLX: # %bb.0: # %entry
10002 ; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0
10003 ; VLX-NEXT: kmovq %k0, %rax
10004 ; VLX-NEXT: vzeroupper
10007 ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
10008 ; NoVLX: # %bb.0: # %entry
10009 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
10010 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10011 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
10012 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
10013 ; NoVLX-NEXT: kmovw %k0, %ecx
10014 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
10015 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
10016 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10017 ; NoVLX-NEXT: kmovw %k0, %eax
10018 ; NoVLX-NEXT: shll $16, %eax
10019 ; NoVLX-NEXT: orl %ecx, %eax
10020 ; NoVLX-NEXT: vzeroupper
10023 %0 = bitcast <4 x i64> %__a to <32 x i8>
10024 %1 = bitcast <4 x i64> %__b to <32 x i8>
10025 %2 = icmp sge <32 x i8> %0, %1
10026 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10027 %4 = bitcast <64 x i1> %3 to i64
10031 define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10032 ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
10033 ; VLX: # %bb.0: # %entry
10034 ; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0
10035 ; VLX-NEXT: kmovq %k0, %rax
10036 ; VLX-NEXT: vzeroupper
10039 ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
10040 ; NoVLX: # %bb.0: # %entry
10041 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
10042 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
10043 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10044 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
10045 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
10046 ; NoVLX-NEXT: kmovw %k0, %ecx
10047 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
10048 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
10049 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10050 ; NoVLX-NEXT: kmovw %k0, %eax
10051 ; NoVLX-NEXT: shll $16, %eax
10052 ; NoVLX-NEXT: orl %ecx, %eax
10053 ; NoVLX-NEXT: vzeroupper
10056 %0 = bitcast <4 x i64> %__a to <32 x i8>
10057 %load = load <4 x i64>, <4 x i64>* %__b
10058 %1 = bitcast <4 x i64> %load to <32 x i8>
10059 %2 = icmp sge <32 x i8> %0, %1
10060 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10061 %4 = bitcast <64 x i1> %3 to i64
10065 define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10066 ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
10067 ; VLX: # %bb.0: # %entry
10068 ; VLX-NEXT: kmovd %edi, %k1
10069 ; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1}
10070 ; VLX-NEXT: kmovq %k0, %rax
10071 ; VLX-NEXT: vzeroupper
10074 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
10075 ; NoVLX: # %bb.0: # %entry
10076 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
10077 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10078 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
10079 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
10080 ; NoVLX-NEXT: kmovw %k0, %eax
10081 ; NoVLX-NEXT: andl %edi, %eax
10082 ; NoVLX-NEXT: shrl $16, %edi
10083 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
10084 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
10085 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10086 ; NoVLX-NEXT: kmovw %k0, %ecx
10087 ; NoVLX-NEXT: andl %edi, %ecx
10088 ; NoVLX-NEXT: shll $16, %ecx
10089 ; NoVLX-NEXT: movzwl %ax, %eax
10090 ; NoVLX-NEXT: orl %ecx, %eax
10091 ; NoVLX-NEXT: vzeroupper
10094 %0 = bitcast <4 x i64> %__a to <32 x i8>
10095 %1 = bitcast <4 x i64> %__b to <32 x i8>
10096 %2 = icmp sge <32 x i8> %0, %1
10097 %3 = bitcast i32 %__u to <32 x i1>
10098 %4 = and <32 x i1> %2, %3
10099 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10100 %6 = bitcast <64 x i1> %5 to i64
10104 define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10105 ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
10106 ; VLX: # %bb.0: # %entry
10107 ; VLX-NEXT: kmovd %edi, %k1
10108 ; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
10109 ; VLX-NEXT: kmovq %k0, %rax
10110 ; VLX-NEXT: vzeroupper
10113 ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
10114 ; NoVLX: # %bb.0: # %entry
10115 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10116 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
10117 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10118 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
10119 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
10120 ; NoVLX-NEXT: kmovw %k0, %eax
10121 ; NoVLX-NEXT: andl %edi, %eax
10122 ; NoVLX-NEXT: shrl $16, %edi
10123 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
10124 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
10125 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10126 ; NoVLX-NEXT: kmovw %k0, %ecx
10127 ; NoVLX-NEXT: andl %edi, %ecx
10128 ; NoVLX-NEXT: shll $16, %ecx
10129 ; NoVLX-NEXT: movzwl %ax, %eax
10130 ; NoVLX-NEXT: orl %ecx, %eax
10131 ; NoVLX-NEXT: vzeroupper
10134 %0 = bitcast <4 x i64> %__a to <32 x i8>
10135 %load = load <4 x i64>, <4 x i64>* %__b
10136 %1 = bitcast <4 x i64> %load to <32 x i8>
10137 %2 = icmp sge <32 x i8> %0, %1
10138 %3 = bitcast i32 %__u to <32 x i1>
10139 %4 = and <32 x i1> %2, %3
10140 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10141 %6 = bitcast <64 x i1> %5 to i64
10146 define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10147 ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
10148 ; VLX: # %bb.0: # %entry
10149 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10150 ; VLX-NEXT: kmovd %k0, %eax
10151 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10154 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
10155 ; NoVLX: # %bb.0: # %entry
10156 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10157 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10158 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10159 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10160 ; NoVLX-NEXT: kmovw %k0, %eax
10161 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10162 ; NoVLX-NEXT: vzeroupper
10165 %0 = bitcast <2 x i64> %__a to <8 x i16>
10166 %1 = bitcast <2 x i64> %__b to <8 x i16>
10167 %2 = icmp sge <8 x i16> %0, %1
10168 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10169 %4 = bitcast <16 x i1> %3 to i16
10173 define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10174 ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10175 ; VLX: # %bb.0: # %entry
10176 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10177 ; VLX-NEXT: kmovd %k0, %eax
10178 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10181 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
10182 ; NoVLX: # %bb.0: # %entry
10183 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10184 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10185 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10186 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10187 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10188 ; NoVLX-NEXT: kmovw %k0, %eax
10189 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10190 ; NoVLX-NEXT: vzeroupper
10193 %0 = bitcast <2 x i64> %__a to <8 x i16>
10194 %load = load <2 x i64>, <2 x i64>* %__b
10195 %1 = bitcast <2 x i64> %load to <8 x i16>
10196 %2 = icmp sge <8 x i16> %0, %1
10197 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10198 %4 = bitcast <16 x i1> %3 to i16
10202 define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10203 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10204 ; VLX: # %bb.0: # %entry
10205 ; VLX-NEXT: kmovd %edi, %k1
10206 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10207 ; VLX-NEXT: kmovd %k0, %eax
10208 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10211 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
10212 ; NoVLX: # %bb.0: # %entry
10213 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10214 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10215 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10216 ; NoVLX-NEXT: kmovw %edi, %k1
10217 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10218 ; NoVLX-NEXT: kmovw %k0, %eax
10219 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10220 ; NoVLX-NEXT: vzeroupper
10223 %0 = bitcast <2 x i64> %__a to <8 x i16>
10224 %1 = bitcast <2 x i64> %__b to <8 x i16>
10225 %2 = icmp sge <8 x i16> %0, %1
10226 %3 = bitcast i8 %__u to <8 x i1>
10227 %4 = and <8 x i1> %2, %3
10228 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10229 %6 = bitcast <16 x i1> %5 to i16
10233 define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10234 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10235 ; VLX: # %bb.0: # %entry
10236 ; VLX-NEXT: kmovd %edi, %k1
10237 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10238 ; VLX-NEXT: kmovd %k0, %eax
10239 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
10242 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
10243 ; NoVLX: # %bb.0: # %entry
10244 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10245 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10246 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10247 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10248 ; NoVLX-NEXT: kmovw %edi, %k1
10249 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10250 ; NoVLX-NEXT: kmovw %k0, %eax
10251 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
10252 ; NoVLX-NEXT: vzeroupper
10255 %0 = bitcast <2 x i64> %__a to <8 x i16>
10256 %load = load <2 x i64>, <2 x i64>* %__b
10257 %1 = bitcast <2 x i64> %load to <8 x i16>
10258 %2 = icmp sge <8 x i16> %0, %1
10259 %3 = bitcast i8 %__u to <8 x i1>
10260 %4 = and <8 x i1> %2, %3
10261 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10262 %6 = bitcast <16 x i1> %5 to i16
10267 define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10268 ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10269 ; VLX: # %bb.0: # %entry
10270 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10271 ; VLX-NEXT: kmovd %k0, %eax
10274 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
10275 ; NoVLX: # %bb.0: # %entry
10276 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10277 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10278 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10279 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10280 ; NoVLX-NEXT: kmovw %k0, %eax
10281 ; NoVLX-NEXT: vzeroupper
10284 %0 = bitcast <2 x i64> %__a to <8 x i16>
10285 %1 = bitcast <2 x i64> %__b to <8 x i16>
10286 %2 = icmp sge <8 x i16> %0, %1
10287 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10288 %4 = bitcast <32 x i1> %3 to i32
10292 define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10293 ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10294 ; VLX: # %bb.0: # %entry
10295 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10296 ; VLX-NEXT: kmovd %k0, %eax
10299 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
10300 ; NoVLX: # %bb.0: # %entry
10301 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10302 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10303 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10304 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10305 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10306 ; NoVLX-NEXT: kmovw %k0, %eax
10307 ; NoVLX-NEXT: vzeroupper
10310 %0 = bitcast <2 x i64> %__a to <8 x i16>
10311 %load = load <2 x i64>, <2 x i64>* %__b
10312 %1 = bitcast <2 x i64> %load to <8 x i16>
10313 %2 = icmp sge <8 x i16> %0, %1
10314 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10315 %4 = bitcast <32 x i1> %3 to i32
10319 define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10320 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10321 ; VLX: # %bb.0: # %entry
10322 ; VLX-NEXT: kmovd %edi, %k1
10323 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10324 ; VLX-NEXT: kmovd %k0, %eax
10327 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
10328 ; NoVLX: # %bb.0: # %entry
10329 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10330 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10331 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10332 ; NoVLX-NEXT: kmovw %edi, %k1
10333 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10334 ; NoVLX-NEXT: kmovw %k0, %eax
10335 ; NoVLX-NEXT: vzeroupper
10338 %0 = bitcast <2 x i64> %__a to <8 x i16>
10339 %1 = bitcast <2 x i64> %__b to <8 x i16>
10340 %2 = icmp sge <8 x i16> %0, %1
10341 %3 = bitcast i8 %__u to <8 x i1>
10342 %4 = and <8 x i1> %2, %3
10343 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10344 %6 = bitcast <32 x i1> %5 to i32
10348 define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10349 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10350 ; VLX: # %bb.0: # %entry
10351 ; VLX-NEXT: kmovd %edi, %k1
10352 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10353 ; VLX-NEXT: kmovd %k0, %eax
10356 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
10357 ; NoVLX: # %bb.0: # %entry
10358 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10359 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10360 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10361 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10362 ; NoVLX-NEXT: kmovw %edi, %k1
10363 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10364 ; NoVLX-NEXT: kmovw %k0, %eax
10365 ; NoVLX-NEXT: vzeroupper
10368 %0 = bitcast <2 x i64> %__a to <8 x i16>
10369 %load = load <2 x i64>, <2 x i64>* %__b
10370 %1 = bitcast <2 x i64> %load to <8 x i16>
10371 %2 = icmp sge <8 x i16> %0, %1
10372 %3 = bitcast i8 %__u to <8 x i1>
10373 %4 = and <8 x i1> %2, %3
10374 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10375 %6 = bitcast <32 x i1> %5 to i32
10380 define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10381 ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10382 ; VLX: # %bb.0: # %entry
10383 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0
10384 ; VLX-NEXT: kmovq %k0, %rax
10387 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
10388 ; NoVLX: # %bb.0: # %entry
10389 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10390 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10391 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10392 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10393 ; NoVLX-NEXT: kmovw %k0, %eax
10394 ; NoVLX-NEXT: movzwl %ax, %eax
10395 ; NoVLX-NEXT: vzeroupper
10398 %0 = bitcast <2 x i64> %__a to <8 x i16>
10399 %1 = bitcast <2 x i64> %__b to <8 x i16>
10400 %2 = icmp sge <8 x i16> %0, %1
10401 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10402 %4 = bitcast <64 x i1> %3 to i64
10406 define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10407 ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10408 ; VLX: # %bb.0: # %entry
10409 ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
10410 ; VLX-NEXT: kmovq %k0, %rax
10413 ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
10414 ; NoVLX: # %bb.0: # %entry
10415 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10416 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10417 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10418 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10419 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
10420 ; NoVLX-NEXT: kmovw %k0, %eax
10421 ; NoVLX-NEXT: movzwl %ax, %eax
10422 ; NoVLX-NEXT: vzeroupper
10425 %0 = bitcast <2 x i64> %__a to <8 x i16>
10426 %load = load <2 x i64>, <2 x i64>* %__b
10427 %1 = bitcast <2 x i64> %load to <8 x i16>
10428 %2 = icmp sge <8 x i16> %0, %1
10429 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10430 %4 = bitcast <64 x i1> %3 to i64
10434 define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10435 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10436 ; VLX: # %bb.0: # %entry
10437 ; VLX-NEXT: kmovd %edi, %k1
10438 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1}
10439 ; VLX-NEXT: kmovq %k0, %rax
10442 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
10443 ; NoVLX: # %bb.0: # %entry
10444 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10445 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10446 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10447 ; NoVLX-NEXT: kmovw %edi, %k1
10448 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10449 ; NoVLX-NEXT: kmovw %k0, %eax
10450 ; NoVLX-NEXT: movzwl %ax, %eax
10451 ; NoVLX-NEXT: vzeroupper
10454 %0 = bitcast <2 x i64> %__a to <8 x i16>
10455 %1 = bitcast <2 x i64> %__b to <8 x i16>
10456 %2 = icmp sge <8 x i16> %0, %1
10457 %3 = bitcast i8 %__u to <8 x i1>
10458 %4 = and <8 x i1> %2, %3
10459 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10460 %6 = bitcast <64 x i1> %5 to i64
10464 define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10465 ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10466 ; VLX: # %bb.0: # %entry
10467 ; VLX-NEXT: kmovd %edi, %k1
10468 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
10469 ; VLX-NEXT: kmovq %k0, %rax
10472 ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
10473 ; NoVLX: # %bb.0: # %entry
10474 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10475 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
10476 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10477 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
10478 ; NoVLX-NEXT: kmovw %edi, %k1
10479 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
10480 ; NoVLX-NEXT: kmovw %k0, %eax
10481 ; NoVLX-NEXT: movzwl %ax, %eax
10482 ; NoVLX-NEXT: vzeroupper
10485 %0 = bitcast <2 x i64> %__a to <8 x i16>
10486 %load = load <2 x i64>, <2 x i64>* %__b
10487 %1 = bitcast <2 x i64> %load to <8 x i16>
10488 %2 = icmp sge <8 x i16> %0, %1
10489 %3 = bitcast i8 %__u to <8 x i1>
10490 %4 = and <8 x i1> %2, %3
10491 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10492 %6 = bitcast <64 x i1> %5 to i64
10497 define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10498 ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10499 ; VLX: # %bb.0: # %entry
10500 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0
10501 ; VLX-NEXT: kmovd %k0, %eax
10502 ; VLX-NEXT: vzeroupper
10505 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
10506 ; NoVLX: # %bb.0: # %entry
10507 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10508 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10509 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10510 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10511 ; NoVLX-NEXT: kmovw %k0, %eax
10512 ; NoVLX-NEXT: vzeroupper
10515 %0 = bitcast <4 x i64> %__a to <16 x i16>
10516 %1 = bitcast <4 x i64> %__b to <16 x i16>
10517 %2 = icmp sge <16 x i16> %0, %1
10518 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10519 %4 = bitcast <32 x i1> %3 to i32
10523 define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10524 ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10525 ; VLX: # %bb.0: # %entry
10526 ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
10527 ; VLX-NEXT: kmovd %k0, %eax
10528 ; VLX-NEXT: vzeroupper
10531 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
10532 ; NoVLX: # %bb.0: # %entry
10533 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
10534 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10535 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10536 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10537 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10538 ; NoVLX-NEXT: kmovw %k0, %eax
10539 ; NoVLX-NEXT: vzeroupper
10542 %0 = bitcast <4 x i64> %__a to <16 x i16>
10543 %load = load <4 x i64>, <4 x i64>* %__b
10544 %1 = bitcast <4 x i64> %load to <16 x i16>
10545 %2 = icmp sge <16 x i16> %0, %1
10546 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10547 %4 = bitcast <32 x i1> %3 to i32
10551 define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10552 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10553 ; VLX: # %bb.0: # %entry
10554 ; VLX-NEXT: kmovd %edi, %k1
10555 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10556 ; VLX-NEXT: kmovd %k0, %eax
10557 ; VLX-NEXT: vzeroupper
10560 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
10561 ; NoVLX: # %bb.0: # %entry
10562 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10563 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10564 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10565 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10566 ; NoVLX-NEXT: kmovw %k0, %eax
10567 ; NoVLX-NEXT: andl %edi, %eax
10568 ; NoVLX-NEXT: vzeroupper
10571 %0 = bitcast <4 x i64> %__a to <16 x i16>
10572 %1 = bitcast <4 x i64> %__b to <16 x i16>
10573 %2 = icmp sge <16 x i16> %0, %1
10574 %3 = bitcast i16 %__u to <16 x i1>
10575 %4 = and <16 x i1> %2, %3
10576 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10577 %6 = bitcast <32 x i1> %5 to i32
10581 define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10582 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10583 ; VLX: # %bb.0: # %entry
10584 ; VLX-NEXT: kmovd %edi, %k1
10585 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10586 ; VLX-NEXT: kmovd %k0, %eax
10587 ; VLX-NEXT: vzeroupper
10590 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
10591 ; NoVLX: # %bb.0: # %entry
10592 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10593 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10594 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10595 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10596 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10597 ; NoVLX-NEXT: kmovw %k0, %eax
10598 ; NoVLX-NEXT: andl %edi, %eax
10599 ; NoVLX-NEXT: vzeroupper
10602 %0 = bitcast <4 x i64> %__a to <16 x i16>
10603 %load = load <4 x i64>, <4 x i64>* %__b
10604 %1 = bitcast <4 x i64> %load to <16 x i16>
10605 %2 = icmp sge <16 x i16> %0, %1
10606 %3 = bitcast i16 %__u to <16 x i1>
10607 %4 = and <16 x i1> %2, %3
10608 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10609 %6 = bitcast <32 x i1> %5 to i32
10614 define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10615 ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10616 ; VLX: # %bb.0: # %entry
10617 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0
10618 ; VLX-NEXT: kmovq %k0, %rax
10619 ; VLX-NEXT: vzeroupper
10622 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
10623 ; NoVLX: # %bb.0: # %entry
10624 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10625 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10626 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10627 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10628 ; NoVLX-NEXT: kmovw %k0, %eax
10629 ; NoVLX-NEXT: movzwl %ax, %eax
10630 ; NoVLX-NEXT: vzeroupper
10633 %0 = bitcast <4 x i64> %__a to <16 x i16>
10634 %1 = bitcast <4 x i64> %__b to <16 x i16>
10635 %2 = icmp sge <16 x i16> %0, %1
10636 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10637 %4 = bitcast <64 x i1> %3 to i64
10641 define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10642 ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10643 ; VLX: # %bb.0: # %entry
10644 ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
10645 ; VLX-NEXT: kmovq %k0, %rax
10646 ; VLX-NEXT: vzeroupper
10649 ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
10650 ; NoVLX: # %bb.0: # %entry
10651 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
10652 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10653 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10654 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10655 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10656 ; NoVLX-NEXT: kmovw %k0, %eax
10657 ; NoVLX-NEXT: movzwl %ax, %eax
10658 ; NoVLX-NEXT: vzeroupper
10661 %0 = bitcast <4 x i64> %__a to <16 x i16>
10662 %load = load <4 x i64>, <4 x i64>* %__b
10663 %1 = bitcast <4 x i64> %load to <16 x i16>
10664 %2 = icmp sge <16 x i16> %0, %1
10665 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10666 %4 = bitcast <64 x i1> %3 to i64
10670 define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
10671 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10672 ; VLX: # %bb.0: # %entry
10673 ; VLX-NEXT: kmovd %edi, %k1
10674 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1}
10675 ; VLX-NEXT: kmovq %k0, %rax
10676 ; VLX-NEXT: vzeroupper
10679 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
10680 ; NoVLX: # %bb.0: # %entry
10681 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10682 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10683 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10684 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10685 ; NoVLX-NEXT: kmovw %k0, %eax
10686 ; NoVLX-NEXT: andl %edi, %eax
10687 ; NoVLX-NEXT: vzeroupper
10690 %0 = bitcast <4 x i64> %__a to <16 x i16>
10691 %1 = bitcast <4 x i64> %__b to <16 x i16>
10692 %2 = icmp sge <16 x i16> %0, %1
10693 %3 = bitcast i16 %__u to <16 x i1>
10694 %4 = and <16 x i1> %2, %3
10695 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10696 %6 = bitcast <64 x i1> %5 to i64
10700 define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
10701 ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10702 ; VLX: # %bb.0: # %entry
10703 ; VLX-NEXT: kmovd %edi, %k1
10704 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
10705 ; VLX-NEXT: kmovq %k0, %rax
10706 ; VLX-NEXT: vzeroupper
10709 ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
10710 ; NoVLX: # %bb.0: # %entry
10711 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10712 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10713 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10714 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10715 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10716 ; NoVLX-NEXT: kmovw %k0, %eax
10717 ; NoVLX-NEXT: andl %edi, %eax
10718 ; NoVLX-NEXT: vzeroupper
10721 %0 = bitcast <4 x i64> %__a to <16 x i16>
10722 %load = load <4 x i64>, <4 x i64>* %__b
10723 %1 = bitcast <4 x i64> %load to <16 x i16>
10724 %2 = icmp sge <16 x i16> %0, %1
10725 %3 = bitcast i16 %__u to <16 x i1>
10726 %4 = and <16 x i1> %2, %3
10727 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
10728 %6 = bitcast <64 x i1> %5 to i64
10733 define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10734 ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10735 ; VLX: # %bb.0: # %entry
10736 ; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
10737 ; VLX-NEXT: kmovq %k0, %rax
10738 ; VLX-NEXT: vzeroupper
10741 ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
10742 ; NoVLX: # %bb.0: # %entry
10743 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
10744 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
10745 ; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
10746 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10747 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10748 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10749 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10750 ; NoVLX-NEXT: kmovw %k0, %ecx
10751 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
10752 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
10753 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10754 ; NoVLX-NEXT: kmovw %k0, %eax
10755 ; NoVLX-NEXT: shll $16, %eax
10756 ; NoVLX-NEXT: orl %ecx, %eax
10757 ; NoVLX-NEXT: vzeroupper
10760 %0 = bitcast <8 x i64> %__a to <32 x i16>
10761 %1 = bitcast <8 x i64> %__b to <32 x i16>
10762 %2 = icmp sge <32 x i16> %0, %1
10763 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10764 %4 = bitcast <64 x i1> %3 to i64
10768 define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
10769 ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10770 ; VLX: # %bb.0: # %entry
10771 ; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0
10772 ; VLX-NEXT: kmovq %k0, %rax
10773 ; VLX-NEXT: vzeroupper
10776 ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
10777 ; NoVLX: # %bb.0: # %entry
10778 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
10779 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
10780 ; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm3
10781 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
10782 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
10783 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10784 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10785 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10786 ; NoVLX-NEXT: kmovw %k0, %ecx
10787 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
10788 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
10789 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10790 ; NoVLX-NEXT: kmovw %k0, %eax
10791 ; NoVLX-NEXT: shll $16, %eax
10792 ; NoVLX-NEXT: orl %ecx, %eax
10793 ; NoVLX-NEXT: vzeroupper
10796 %0 = bitcast <8 x i64> %__a to <32 x i16>
10797 %load = load <8 x i64>, <8 x i64>* %__b
10798 %1 = bitcast <8 x i64> %load to <32 x i16>
10799 %2 = icmp sge <32 x i16> %0, %1
10800 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10801 %4 = bitcast <64 x i1> %3 to i64
10805 define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
10806 ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10807 ; VLX: # %bb.0: # %entry
10808 ; VLX-NEXT: kmovd %edi, %k1
10809 ; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
10810 ; VLX-NEXT: kmovq %k0, %rax
10811 ; VLX-NEXT: vzeroupper
10814 ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
10815 ; NoVLX: # %bb.0: # %entry
10816 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
10817 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
10818 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
10819 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
10820 ; NoVLX-NEXT: kmovw %k0, %eax
10821 ; NoVLX-NEXT: andl %edi, %eax
10822 ; NoVLX-NEXT: shrl $16, %edi
10823 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
10824 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
10825 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10826 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10827 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10828 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10829 ; NoVLX-NEXT: kmovw %k0, %ecx
10830 ; NoVLX-NEXT: andl %edi, %ecx
10831 ; NoVLX-NEXT: shll $16, %ecx
10832 ; NoVLX-NEXT: movzwl %ax, %eax
10833 ; NoVLX-NEXT: orl %ecx, %eax
10834 ; NoVLX-NEXT: vzeroupper
10837 %0 = bitcast <8 x i64> %__a to <32 x i16>
10838 %1 = bitcast <8 x i64> %__b to <32 x i16>
10839 %2 = icmp sge <32 x i16> %0, %1
10840 %3 = bitcast i32 %__u to <32 x i1>
10841 %4 = and <32 x i1> %2, %3
10842 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10843 %6 = bitcast <64 x i1> %5 to i64
10847 define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
10848 ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10849 ; VLX: # %bb.0: # %entry
10850 ; VLX-NEXT: kmovd %edi, %k1
10851 ; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
10852 ; VLX-NEXT: kmovq %k0, %rax
10853 ; VLX-NEXT: vzeroupper
10856 ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
10857 ; NoVLX: # %bb.0: # %entry
10858 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
10859 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1
10860 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
10861 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
10862 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
10863 ; NoVLX-NEXT: kmovw %k0, %eax
10864 ; NoVLX-NEXT: andl %edi, %eax
10865 ; NoVLX-NEXT: shrl $16, %edi
10866 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
10867 ; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1
10868 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
10869 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10870 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
10871 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
10872 ; NoVLX-NEXT: kmovw %k0, %ecx
10873 ; NoVLX-NEXT: andl %edi, %ecx
10874 ; NoVLX-NEXT: shll $16, %ecx
10875 ; NoVLX-NEXT: movzwl %ax, %eax
10876 ; NoVLX-NEXT: orl %ecx, %eax
10877 ; NoVLX-NEXT: vzeroupper
10880 %0 = bitcast <8 x i64> %__a to <32 x i16>
10881 %load = load <8 x i64>, <8 x i64>* %__b
10882 %1 = bitcast <8 x i64> %load to <32 x i16>
10883 %2 = icmp sge <32 x i16> %0, %1
10884 %3 = bitcast i32 %__u to <32 x i1>
10885 %4 = and <32 x i1> %2, %3
10886 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
10887 %6 = bitcast <64 x i1> %5 to i64
10892 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10893 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10894 ; VLX: # %bb.0: # %entry
10895 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
10896 ; VLX-NEXT: kmovd %k0, %eax
10897 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10900 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
10901 ; NoVLX: # %bb.0: # %entry
10902 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10903 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10904 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10905 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10906 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10907 ; NoVLX-NEXT: kmovw %k0, %eax
10908 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10909 ; NoVLX-NEXT: vzeroupper
10912 %0 = bitcast <2 x i64> %__a to <4 x i32>
10913 %1 = bitcast <2 x i64> %__b to <4 x i32>
10914 %2 = icmp sge <4 x i32> %0, %1
10915 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10916 %4 = bitcast <8 x i1> %3 to i8
10920 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10921 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10922 ; VLX: # %bb.0: # %entry
10923 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
10924 ; VLX-NEXT: kmovd %k0, %eax
10925 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10928 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
10929 ; NoVLX: # %bb.0: # %entry
10930 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10931 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
10932 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
10933 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10934 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10935 ; NoVLX-NEXT: kmovw %k0, %eax
10936 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10937 ; NoVLX-NEXT: vzeroupper
10940 %0 = bitcast <2 x i64> %__a to <4 x i32>
10941 %load = load <2 x i64>, <2 x i64>* %__b
10942 %1 = bitcast <2 x i64> %load to <4 x i32>
10943 %2 = icmp sge <4 x i32> %0, %1
10944 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10945 %4 = bitcast <8 x i1> %3 to i8
10949 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
10950 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10951 ; VLX: # %bb.0: # %entry
10952 ; VLX-NEXT: kmovd %edi, %k1
10953 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
10954 ; VLX-NEXT: kmovd %k0, %eax
10955 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10958 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
10959 ; NoVLX: # %bb.0: # %entry
10960 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
10961 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10962 ; NoVLX-NEXT: kmovw %edi, %k1
10963 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10964 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10965 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10966 ; NoVLX-NEXT: kmovw %k0, %eax
10967 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
10968 ; NoVLX-NEXT: vzeroupper
10971 %0 = bitcast <2 x i64> %__a to <4 x i32>
10972 %1 = bitcast <2 x i64> %__b to <4 x i32>
10973 %2 = icmp sge <4 x i32> %0, %1
10974 %3 = bitcast i8 %__u to <8 x i1>
10975 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
10976 %4 = and <4 x i1> %2, %extract.i
10977 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10978 %6 = bitcast <8 x i1> %5 to i8
10982 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
10983 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
10984 ; VLX: # %bb.0: # %entry
10985 ; VLX-NEXT: kmovd %edi, %k1
10986 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
10987 ; VLX-NEXT: kmovd %k0, %eax
10988 ; VLX-NEXT: # kill: def $al killed $al killed $eax
10991 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
10992 ; NoVLX: # %bb.0: # %entry
10993 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10994 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
10995 ; NoVLX-NEXT: kmovw %edi, %k1
10996 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
10997 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
10998 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
10999 ; NoVLX-NEXT: kmovw %k0, %eax
11000 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
11001 ; NoVLX-NEXT: vzeroupper
11004 %0 = bitcast <2 x i64> %__a to <4 x i32>
11005 %load = load <2 x i64>, <2 x i64>* %__b
11006 %1 = bitcast <2 x i64> %load to <4 x i32>
11007 %2 = icmp sge <4 x i32> %0, %1
11008 %3 = bitcast i8 %__u to <8 x i1>
11009 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11010 %4 = and <4 x i1> %2, %extract.i
11011 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
11012 %6 = bitcast <8 x i1> %5 to i8
11017 define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11018 ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
11019 ; VLX: # %bb.0: # %entry
11020 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11021 ; VLX-NEXT: kmovd %k0, %eax
11022 ; VLX-NEXT: # kill: def $al killed $al killed $eax
11025 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
11026 ; NoVLX: # %bb.0: # %entry
11027 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11028 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
11029 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11030 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11031 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11032 ; NoVLX-NEXT: kmovw %k0, %eax
11033 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
11034 ; NoVLX-NEXT: vzeroupper
11037 %0 = bitcast <2 x i64> %__a to <4 x i32>
11038 %load = load i32, i32* %__b
11039 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11040 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11041 %2 = icmp sge <4 x i32> %0, %1
11042 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
11043 %4 = bitcast <8 x i1> %3 to i8
11047 define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11048 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
11049 ; VLX: # %bb.0: # %entry
11050 ; VLX-NEXT: kmovd %edi, %k1
11051 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11052 ; VLX-NEXT: kmovd %k0, %eax
11053 ; VLX-NEXT: # kill: def $al killed $al killed $eax
11056 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
11057 ; NoVLX: # %bb.0: # %entry
11058 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11059 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
11060 ; NoVLX-NEXT: kmovw %edi, %k1
11061 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11062 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11063 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11064 ; NoVLX-NEXT: kmovw %k0, %eax
11065 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
11066 ; NoVLX-NEXT: vzeroupper
11069 %0 = bitcast <2 x i64> %__a to <4 x i32>
11070 %load = load i32, i32* %__b
11071 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11072 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11073 %2 = icmp sge <4 x i32> %0, %1
11074 %3 = bitcast i8 %__u to <8 x i1>
11075 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11076 %4 = and <4 x i1> %extract.i, %2
11077 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
11078 %6 = bitcast <8 x i1> %5 to i8
11083 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11084 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
11085 ; VLX: # %bb.0: # %entry
11086 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
11087 ; VLX-NEXT: kmovd %k0, %eax
11088 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11091 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
11092 ; NoVLX: # %bb.0: # %entry
11093 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11094 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11095 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11096 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11097 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11098 ; NoVLX-NEXT: kmovw %k0, %eax
11099 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11100 ; NoVLX-NEXT: vzeroupper
11103 %0 = bitcast <2 x i64> %__a to <4 x i32>
11104 %1 = bitcast <2 x i64> %__b to <4 x i32>
11105 %2 = icmp sge <4 x i32> %0, %1
11106 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11107 %4 = bitcast <16 x i1> %3 to i16
11111 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11112 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
11113 ; VLX: # %bb.0: # %entry
11114 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
11115 ; VLX-NEXT: kmovd %k0, %eax
11116 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11119 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
11120 ; NoVLX: # %bb.0: # %entry
11121 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11122 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
11123 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11124 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11125 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11126 ; NoVLX-NEXT: kmovw %k0, %eax
11127 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11128 ; NoVLX-NEXT: vzeroupper
11131 %0 = bitcast <2 x i64> %__a to <4 x i32>
11132 %load = load <2 x i64>, <2 x i64>* %__b
11133 %1 = bitcast <2 x i64> %load to <4 x i32>
11134 %2 = icmp sge <4 x i32> %0, %1
11135 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11136 %4 = bitcast <16 x i1> %3 to i16
11140 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11141 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
11142 ; VLX: # %bb.0: # %entry
11143 ; VLX-NEXT: kmovd %edi, %k1
11144 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11145 ; VLX-NEXT: kmovd %k0, %eax
11146 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11149 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
11150 ; NoVLX: # %bb.0: # %entry
11151 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11152 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11153 ; NoVLX-NEXT: kmovw %edi, %k1
11154 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11155 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11156 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11157 ; NoVLX-NEXT: kmovw %k0, %eax
11158 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11159 ; NoVLX-NEXT: vzeroupper
11162 %0 = bitcast <2 x i64> %__a to <4 x i32>
11163 %1 = bitcast <2 x i64> %__b to <4 x i32>
11164 %2 = icmp sge <4 x i32> %0, %1
11165 %3 = bitcast i8 %__u to <8 x i1>
11166 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11167 %4 = and <4 x i1> %2, %extract.i
11168 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11169 %6 = bitcast <16 x i1> %5 to i16
11173 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11174 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11175 ; VLX: # %bb.0: # %entry
11176 ; VLX-NEXT: kmovd %edi, %k1
11177 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11178 ; VLX-NEXT: kmovd %k0, %eax
11179 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11182 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
11183 ; NoVLX: # %bb.0: # %entry
11184 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11185 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11186 ; NoVLX-NEXT: kmovw %edi, %k1
11187 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11188 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11189 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11190 ; NoVLX-NEXT: kmovw %k0, %eax
11191 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11192 ; NoVLX-NEXT: vzeroupper
11195 %0 = bitcast <2 x i64> %__a to <4 x i32>
11196 %load = load <2 x i64>, <2 x i64>* %__b
11197 %1 = bitcast <2 x i64> %load to <4 x i32>
11198 %2 = icmp sge <4 x i32> %0, %1
11199 %3 = bitcast i8 %__u to <8 x i1>
11200 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11201 %4 = and <4 x i1> %2, %extract.i
11202 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11203 %6 = bitcast <16 x i1> %5 to i16
11208 define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11209 ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11210 ; VLX: # %bb.0: # %entry
11211 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11212 ; VLX-NEXT: kmovd %k0, %eax
11213 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11216 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
11217 ; NoVLX: # %bb.0: # %entry
11218 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11219 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
11220 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11221 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11222 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11223 ; NoVLX-NEXT: kmovw %k0, %eax
11224 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11225 ; NoVLX-NEXT: vzeroupper
11228 %0 = bitcast <2 x i64> %__a to <4 x i32>
11229 %load = load i32, i32* %__b
11230 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11231 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11232 %2 = icmp sge <4 x i32> %0, %1
11233 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11234 %4 = bitcast <16 x i1> %3 to i16
11238 define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11239 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11240 ; VLX: # %bb.0: # %entry
11241 ; VLX-NEXT: kmovd %edi, %k1
11242 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11243 ; VLX-NEXT: kmovd %k0, %eax
11244 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11247 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
11248 ; NoVLX: # %bb.0: # %entry
11249 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11250 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
11251 ; NoVLX-NEXT: kmovw %edi, %k1
11252 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11253 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11254 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11255 ; NoVLX-NEXT: kmovw %k0, %eax
11256 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11257 ; NoVLX-NEXT: vzeroupper
11260 %0 = bitcast <2 x i64> %__a to <4 x i32>
11261 %load = load i32, i32* %__b
11262 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11263 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11264 %2 = icmp sge <4 x i32> %0, %1
11265 %3 = bitcast i8 %__u to <8 x i1>
11266 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11267 %4 = and <4 x i1> %extract.i, %2
11268 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11269 %6 = bitcast <16 x i1> %5 to i16
11274 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11275 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11276 ; VLX: # %bb.0: # %entry
11277 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
11278 ; VLX-NEXT: kmovd %k0, %eax
11281 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
11282 ; NoVLX: # %bb.0: # %entry
11283 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11284 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11285 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11286 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11287 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11288 ; NoVLX-NEXT: kmovw %k0, %eax
11289 ; NoVLX-NEXT: vzeroupper
11292 %0 = bitcast <2 x i64> %__a to <4 x i32>
11293 %1 = bitcast <2 x i64> %__b to <4 x i32>
11294 %2 = icmp sge <4 x i32> %0, %1
11295 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11296 %4 = bitcast <32 x i1> %3 to i32
11300 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11301 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11302 ; VLX: # %bb.0: # %entry
11303 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
11304 ; VLX-NEXT: kmovd %k0, %eax
11307 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
11308 ; NoVLX: # %bb.0: # %entry
11309 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11310 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
11311 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11312 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11313 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11314 ; NoVLX-NEXT: kmovw %k0, %eax
11315 ; NoVLX-NEXT: vzeroupper
11318 %0 = bitcast <2 x i64> %__a to <4 x i32>
11319 %load = load <2 x i64>, <2 x i64>* %__b
11320 %1 = bitcast <2 x i64> %load to <4 x i32>
11321 %2 = icmp sge <4 x i32> %0, %1
11322 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11323 %4 = bitcast <32 x i1> %3 to i32
11327 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11328 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11329 ; VLX: # %bb.0: # %entry
11330 ; VLX-NEXT: kmovd %edi, %k1
11331 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11332 ; VLX-NEXT: kmovd %k0, %eax
11335 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
11336 ; NoVLX: # %bb.0: # %entry
11337 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11338 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11339 ; NoVLX-NEXT: kmovw %edi, %k1
11340 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11341 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11342 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11343 ; NoVLX-NEXT: kmovw %k0, %eax
11344 ; NoVLX-NEXT: vzeroupper
11347 %0 = bitcast <2 x i64> %__a to <4 x i32>
11348 %1 = bitcast <2 x i64> %__b to <4 x i32>
11349 %2 = icmp sge <4 x i32> %0, %1
11350 %3 = bitcast i8 %__u to <8 x i1>
11351 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11352 %4 = and <4 x i1> %2, %extract.i
11353 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11354 %6 = bitcast <32 x i1> %5 to i32
11358 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11359 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11360 ; VLX: # %bb.0: # %entry
11361 ; VLX-NEXT: kmovd %edi, %k1
11362 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11363 ; VLX-NEXT: kmovd %k0, %eax
11366 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
11367 ; NoVLX: # %bb.0: # %entry
11368 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11369 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11370 ; NoVLX-NEXT: kmovw %edi, %k1
11371 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11372 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11373 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11374 ; NoVLX-NEXT: kmovw %k0, %eax
11375 ; NoVLX-NEXT: vzeroupper
11378 %0 = bitcast <2 x i64> %__a to <4 x i32>
11379 %load = load <2 x i64>, <2 x i64>* %__b
11380 %1 = bitcast <2 x i64> %load to <4 x i32>
11381 %2 = icmp sge <4 x i32> %0, %1
11382 %3 = bitcast i8 %__u to <8 x i1>
11383 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11384 %4 = and <4 x i1> %2, %extract.i
11385 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11386 %6 = bitcast <32 x i1> %5 to i32
11391 define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11392 ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11393 ; VLX: # %bb.0: # %entry
11394 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11395 ; VLX-NEXT: kmovd %k0, %eax
11398 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
11399 ; NoVLX: # %bb.0: # %entry
11400 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11401 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
11402 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11403 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11404 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11405 ; NoVLX-NEXT: kmovw %k0, %eax
11406 ; NoVLX-NEXT: vzeroupper
11409 %0 = bitcast <2 x i64> %__a to <4 x i32>
11410 %load = load i32, i32* %__b
11411 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11412 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11413 %2 = icmp sge <4 x i32> %0, %1
11414 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11415 %4 = bitcast <32 x i1> %3 to i32
11419 define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11420 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11421 ; VLX: # %bb.0: # %entry
11422 ; VLX-NEXT: kmovd %edi, %k1
11423 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11424 ; VLX-NEXT: kmovd %k0, %eax
11427 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
11428 ; NoVLX: # %bb.0: # %entry
11429 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11430 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
11431 ; NoVLX-NEXT: kmovw %edi, %k1
11432 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11433 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11434 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11435 ; NoVLX-NEXT: kmovw %k0, %eax
11436 ; NoVLX-NEXT: vzeroupper
11439 %0 = bitcast <2 x i64> %__a to <4 x i32>
11440 %load = load i32, i32* %__b
11441 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11442 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11443 %2 = icmp sge <4 x i32> %0, %1
11444 %3 = bitcast i8 %__u to <8 x i1>
11445 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11446 %4 = and <4 x i1> %extract.i, %2
11447 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11448 %6 = bitcast <32 x i1> %5 to i32
11453 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11454 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11455 ; VLX: # %bb.0: # %entry
11456 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0
11457 ; VLX-NEXT: kmovq %k0, %rax
11460 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
11461 ; NoVLX: # %bb.0: # %entry
11462 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11463 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11464 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11465 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11466 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11467 ; NoVLX-NEXT: kmovw %k0, %eax
11468 ; NoVLX-NEXT: movzwl %ax, %eax
11469 ; NoVLX-NEXT: vzeroupper
11472 %0 = bitcast <2 x i64> %__a to <4 x i32>
11473 %1 = bitcast <2 x i64> %__b to <4 x i32>
11474 %2 = icmp sge <4 x i32> %0, %1
11475 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11476 %4 = bitcast <64 x i1> %3 to i64
11480 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11481 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11482 ; VLX: # %bb.0: # %entry
11483 ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
11484 ; VLX-NEXT: kmovq %k0, %rax
11487 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
11488 ; NoVLX: # %bb.0: # %entry
11489 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11490 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
11491 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11492 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11493 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11494 ; NoVLX-NEXT: kmovw %k0, %eax
11495 ; NoVLX-NEXT: movzwl %ax, %eax
11496 ; NoVLX-NEXT: vzeroupper
11499 %0 = bitcast <2 x i64> %__a to <4 x i32>
11500 %load = load <2 x i64>, <2 x i64>* %__b
11501 %1 = bitcast <2 x i64> %load to <4 x i32>
11502 %2 = icmp sge <4 x i32> %0, %1
11503 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11504 %4 = bitcast <64 x i1> %3 to i64
11508 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
11509 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11510 ; VLX: # %bb.0: # %entry
11511 ; VLX-NEXT: kmovd %edi, %k1
11512 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1}
11513 ; VLX-NEXT: kmovq %k0, %rax
11516 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
11517 ; NoVLX: # %bb.0: # %entry
11518 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11519 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11520 ; NoVLX-NEXT: kmovw %edi, %k1
11521 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11522 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11523 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11524 ; NoVLX-NEXT: kmovw %k0, %eax
11525 ; NoVLX-NEXT: movzwl %ax, %eax
11526 ; NoVLX-NEXT: vzeroupper
11529 %0 = bitcast <2 x i64> %__a to <4 x i32>
11530 %1 = bitcast <2 x i64> %__b to <4 x i32>
11531 %2 = icmp sge <4 x i32> %0, %1
11532 %3 = bitcast i8 %__u to <8 x i1>
11533 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11534 %4 = and <4 x i1> %2, %extract.i
11535 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11536 %6 = bitcast <64 x i1> %5 to i64
11540 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
11541 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11542 ; VLX: # %bb.0: # %entry
11543 ; VLX-NEXT: kmovd %edi, %k1
11544 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
11545 ; VLX-NEXT: kmovq %k0, %rax
11548 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
11549 ; NoVLX: # %bb.0: # %entry
11550 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11551 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
11552 ; NoVLX-NEXT: kmovw %edi, %k1
11553 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11554 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11555 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11556 ; NoVLX-NEXT: kmovw %k0, %eax
11557 ; NoVLX-NEXT: movzwl %ax, %eax
11558 ; NoVLX-NEXT: vzeroupper
11561 %0 = bitcast <2 x i64> %__a to <4 x i32>
11562 %load = load <2 x i64>, <2 x i64>* %__b
11563 %1 = bitcast <2 x i64> %load to <4 x i32>
11564 %2 = icmp sge <4 x i32> %0, %1
11565 %3 = bitcast i8 %__u to <8 x i1>
11566 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11567 %4 = and <4 x i1> %2, %extract.i
11568 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11569 %6 = bitcast <64 x i1> %5 to i64
11574 define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
11575 ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11576 ; VLX: # %bb.0: # %entry
11577 ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0
11578 ; VLX-NEXT: kmovq %k0, %rax
11581 ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
11582 ; NoVLX: # %bb.0: # %entry
11583 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11584 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
11585 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11586 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11587 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11588 ; NoVLX-NEXT: kmovw %k0, %eax
11589 ; NoVLX-NEXT: movzwl %ax, %eax
11590 ; NoVLX-NEXT: vzeroupper
11593 %0 = bitcast <2 x i64> %__a to <4 x i32>
11594 %load = load i32, i32* %__b
11595 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11596 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11597 %2 = icmp sge <4 x i32> %0, %1
11598 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11599 %4 = bitcast <64 x i1> %3 to i64
11603 define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
11604 ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11605 ; VLX: # %bb.0: # %entry
11606 ; VLX-NEXT: kmovd %edi, %k1
11607 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1}
11608 ; VLX-NEXT: kmovq %k0, %rax
11611 ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
11612 ; NoVLX: # %bb.0: # %entry
11613 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11614 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
11615 ; NoVLX-NEXT: kmovw %edi, %k1
11616 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11617 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
11618 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
11619 ; NoVLX-NEXT: kmovw %k0, %eax
11620 ; NoVLX-NEXT: movzwl %ax, %eax
11621 ; NoVLX-NEXT: vzeroupper
11624 %0 = bitcast <2 x i64> %__a to <4 x i32>
11625 %load = load i32, i32* %__b
11626 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
11627 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11628 %2 = icmp sge <4 x i32> %0, %1
11629 %3 = bitcast i8 %__u to <8 x i1>
11630 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11631 %4 = and <4 x i1> %extract.i, %2
11632 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
11633 %6 = bitcast <64 x i1> %5 to i64
11638 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11639 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11640 ; VLX: # %bb.0: # %entry
11641 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11642 ; VLX-NEXT: kmovd %k0, %eax
11643 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11644 ; VLX-NEXT: vzeroupper
11647 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
11648 ; NoVLX: # %bb.0: # %entry
11649 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11650 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11651 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11652 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11653 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11654 ; NoVLX-NEXT: kmovw %k0, %eax
11655 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11656 ; NoVLX-NEXT: vzeroupper
11659 %0 = bitcast <4 x i64> %__a to <8 x i32>
11660 %1 = bitcast <4 x i64> %__b to <8 x i32>
11661 %2 = icmp sge <8 x i32> %0, %1
11662 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11663 %4 = bitcast <16 x i1> %3 to i16
11667 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11668 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11669 ; VLX: # %bb.0: # %entry
11670 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11671 ; VLX-NEXT: kmovd %k0, %eax
11672 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11673 ; VLX-NEXT: vzeroupper
11676 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
11677 ; NoVLX: # %bb.0: # %entry
11678 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11679 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11680 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11681 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11682 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11683 ; NoVLX-NEXT: kmovw %k0, %eax
11684 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11685 ; NoVLX-NEXT: vzeroupper
11688 %0 = bitcast <4 x i64> %__a to <8 x i32>
11689 %load = load <4 x i64>, <4 x i64>* %__b
11690 %1 = bitcast <4 x i64> %load to <8 x i32>
11691 %2 = icmp sge <8 x i32> %0, %1
11692 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11693 %4 = bitcast <16 x i1> %3 to i16
11697 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11698 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11699 ; VLX: # %bb.0: # %entry
11700 ; VLX-NEXT: kmovd %edi, %k1
11701 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11702 ; VLX-NEXT: kmovd %k0, %eax
11703 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11704 ; VLX-NEXT: vzeroupper
11707 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
11708 ; NoVLX: # %bb.0: # %entry
11709 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11710 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11711 ; NoVLX-NEXT: kmovw %edi, %k1
11712 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11713 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11714 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11715 ; NoVLX-NEXT: kmovw %k0, %eax
11716 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11717 ; NoVLX-NEXT: vzeroupper
11720 %0 = bitcast <4 x i64> %__a to <8 x i32>
11721 %1 = bitcast <4 x i64> %__b to <8 x i32>
11722 %2 = icmp sge <8 x i32> %0, %1
11723 %3 = bitcast i8 %__u to <8 x i1>
11724 %4 = and <8 x i1> %2, %3
11725 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11726 %6 = bitcast <16 x i1> %5 to i16
11730 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11731 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11732 ; VLX: # %bb.0: # %entry
11733 ; VLX-NEXT: kmovd %edi, %k1
11734 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11735 ; VLX-NEXT: kmovd %k0, %eax
11736 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11737 ; VLX-NEXT: vzeroupper
11740 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
11741 ; NoVLX: # %bb.0: # %entry
11742 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11743 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
11744 ; NoVLX-NEXT: kmovw %edi, %k1
11745 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11746 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11747 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11748 ; NoVLX-NEXT: kmovw %k0, %eax
11749 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11750 ; NoVLX-NEXT: vzeroupper
11753 %0 = bitcast <4 x i64> %__a to <8 x i32>
11754 %load = load <4 x i64>, <4 x i64>* %__b
11755 %1 = bitcast <4 x i64> %load to <8 x i32>
11756 %2 = icmp sge <8 x i32> %0, %1
11757 %3 = bitcast i8 %__u to <8 x i1>
11758 %4 = and <8 x i1> %2, %3
11759 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11760 %6 = bitcast <16 x i1> %5 to i16
11765 define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
11766 ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11767 ; VLX: # %bb.0: # %entry
11768 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11769 ; VLX-NEXT: kmovd %k0, %eax
11770 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11771 ; VLX-NEXT: vzeroupper
11774 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
11775 ; NoVLX: # %bb.0: # %entry
11776 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11777 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
11778 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11779 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11780 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11781 ; NoVLX-NEXT: kmovw %k0, %eax
11782 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11783 ; NoVLX-NEXT: vzeroupper
11786 %0 = bitcast <4 x i64> %__a to <8 x i32>
11787 %load = load i32, i32* %__b
11788 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11789 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11790 %2 = icmp sge <8 x i32> %0, %1
11791 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11792 %4 = bitcast <16 x i1> %3 to i16
11796 define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
11797 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11798 ; VLX: # %bb.0: # %entry
11799 ; VLX-NEXT: kmovd %edi, %k1
11800 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11801 ; VLX-NEXT: kmovd %k0, %eax
11802 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
11803 ; VLX-NEXT: vzeroupper
11806 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
11807 ; NoVLX: # %bb.0: # %entry
11808 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11809 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
11810 ; NoVLX-NEXT: kmovw %edi, %k1
11811 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11812 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11813 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11814 ; NoVLX-NEXT: kmovw %k0, %eax
11815 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
11816 ; NoVLX-NEXT: vzeroupper
11819 %0 = bitcast <4 x i64> %__a to <8 x i32>
11820 %load = load i32, i32* %__b
11821 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11822 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11823 %2 = icmp sge <8 x i32> %0, %1
11824 %3 = bitcast i8 %__u to <8 x i1>
11825 %4 = and <8 x i1> %3, %2
11826 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11827 %6 = bitcast <16 x i1> %5 to i16
11832 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11833 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11834 ; VLX: # %bb.0: # %entry
11835 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
11836 ; VLX-NEXT: kmovd %k0, %eax
11837 ; VLX-NEXT: vzeroupper
11840 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
11841 ; NoVLX: # %bb.0: # %entry
11842 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11843 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11844 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11845 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11846 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11847 ; NoVLX-NEXT: kmovw %k0, %eax
11848 ; NoVLX-NEXT: vzeroupper
11851 %0 = bitcast <4 x i64> %__a to <8 x i32>
11852 %1 = bitcast <4 x i64> %__b to <8 x i32>
11853 %2 = icmp sge <8 x i32> %0, %1
11854 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11855 %4 = bitcast <32 x i1> %3 to i32
11859 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11860 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11861 ; VLX: # %bb.0: # %entry
11862 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
11863 ; VLX-NEXT: kmovd %k0, %eax
11864 ; VLX-NEXT: vzeroupper
11867 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
11868 ; NoVLX: # %bb.0: # %entry
11869 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11870 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
11871 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11872 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11873 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11874 ; NoVLX-NEXT: kmovw %k0, %eax
11875 ; NoVLX-NEXT: vzeroupper
11878 %0 = bitcast <4 x i64> %__a to <8 x i32>
11879 %load = load <4 x i64>, <4 x i64>* %__b
11880 %1 = bitcast <4 x i64> %load to <8 x i32>
11881 %2 = icmp sge <8 x i32> %0, %1
11882 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11883 %4 = bitcast <32 x i1> %3 to i32
11887 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
11888 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11889 ; VLX: # %bb.0: # %entry
11890 ; VLX-NEXT: kmovd %edi, %k1
11891 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
11892 ; VLX-NEXT: kmovd %k0, %eax
11893 ; VLX-NEXT: vzeroupper
11896 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
11897 ; NoVLX: # %bb.0: # %entry
11898 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11899 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11900 ; NoVLX-NEXT: kmovw %edi, %k1
11901 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11902 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11903 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11904 ; NoVLX-NEXT: kmovw %k0, %eax
11905 ; NoVLX-NEXT: vzeroupper
11908 %0 = bitcast <4 x i64> %__a to <8 x i32>
11909 %1 = bitcast <4 x i64> %__b to <8 x i32>
11910 %2 = icmp sge <8 x i32> %0, %1
11911 %3 = bitcast i8 %__u to <8 x i1>
11912 %4 = and <8 x i1> %2, %3
11913 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11914 %6 = bitcast <32 x i1> %5 to i32
11918 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
11919 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11920 ; VLX: # %bb.0: # %entry
11921 ; VLX-NEXT: kmovd %edi, %k1
11922 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
11923 ; VLX-NEXT: kmovd %k0, %eax
11924 ; VLX-NEXT: vzeroupper
11927 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
11928 ; NoVLX: # %bb.0: # %entry
11929 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11930 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
11931 ; NoVLX-NEXT: kmovw %edi, %k1
11932 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11933 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11934 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11935 ; NoVLX-NEXT: kmovw %k0, %eax
11936 ; NoVLX-NEXT: vzeroupper
11939 %0 = bitcast <4 x i64> %__a to <8 x i32>
11940 %load = load <4 x i64>, <4 x i64>* %__b
11941 %1 = bitcast <4 x i64> %load to <8 x i32>
11942 %2 = icmp sge <8 x i32> %0, %1
11943 %3 = bitcast i8 %__u to <8 x i1>
11944 %4 = and <8 x i1> %2, %3
11945 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11946 %6 = bitcast <32 x i1> %5 to i32
11951 define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
11952 ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11953 ; VLX: # %bb.0: # %entry
11954 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
11955 ; VLX-NEXT: kmovd %k0, %eax
11956 ; VLX-NEXT: vzeroupper
11959 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
11960 ; NoVLX: # %bb.0: # %entry
11961 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11962 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
11963 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
11964 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11965 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11966 ; NoVLX-NEXT: kmovw %k0, %eax
11967 ; NoVLX-NEXT: vzeroupper
11970 %0 = bitcast <4 x i64> %__a to <8 x i32>
11971 %load = load i32, i32* %__b
11972 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
11973 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11974 %2 = icmp sge <8 x i32> %0, %1
11975 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
11976 %4 = bitcast <32 x i1> %3 to i32
11980 define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
11981 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
11982 ; VLX: # %bb.0: # %entry
11983 ; VLX-NEXT: kmovd %edi, %k1
11984 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
11985 ; VLX-NEXT: kmovd %k0, %eax
11986 ; VLX-NEXT: vzeroupper
11989 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
11990 ; NoVLX: # %bb.0: # %entry
11991 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
11992 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
11993 ; NoVLX-NEXT: kmovw %edi, %k1
11994 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
11995 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
11996 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
11997 ; NoVLX-NEXT: kmovw %k0, %eax
11998 ; NoVLX-NEXT: vzeroupper
12001 %0 = bitcast <4 x i64> %__a to <8 x i32>
12002 %load = load i32, i32* %__b
12003 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12004 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12005 %2 = icmp sge <8 x i32> %0, %1
12006 %3 = bitcast i8 %__u to <8 x i1>
12007 %4 = and <8 x i1> %3, %2
12008 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12009 %6 = bitcast <32 x i1> %5 to i32
12014 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
12015 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
12016 ; VLX: # %bb.0: # %entry
12017 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0
12018 ; VLX-NEXT: kmovq %k0, %rax
12019 ; VLX-NEXT: vzeroupper
12022 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
12023 ; NoVLX: # %bb.0: # %entry
12024 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
12025 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
12026 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12027 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
12028 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
12029 ; NoVLX-NEXT: kmovw %k0, %eax
12030 ; NoVLX-NEXT: movzwl %ax, %eax
12031 ; NoVLX-NEXT: vzeroupper
12034 %0 = bitcast <4 x i64> %__a to <8 x i32>
12035 %1 = bitcast <4 x i64> %__b to <8 x i32>
12036 %2 = icmp sge <8 x i32> %0, %1
12037 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12038 %4 = bitcast <64 x i1> %3 to i64
12042 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
12043 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
12044 ; VLX: # %bb.0: # %entry
12045 ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
12046 ; VLX-NEXT: kmovq %k0, %rax
12047 ; VLX-NEXT: vzeroupper
12050 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
12051 ; NoVLX: # %bb.0: # %entry
12052 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
12053 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
12054 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12055 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
12056 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
12057 ; NoVLX-NEXT: kmovw %k0, %eax
12058 ; NoVLX-NEXT: movzwl %ax, %eax
12059 ; NoVLX-NEXT: vzeroupper
12062 %0 = bitcast <4 x i64> %__a to <8 x i32>
12063 %load = load <4 x i64>, <4 x i64>* %__b
12064 %1 = bitcast <4 x i64> %load to <8 x i32>
12065 %2 = icmp sge <8 x i32> %0, %1
12066 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12067 %4 = bitcast <64 x i1> %3 to i64
12071 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
12072 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
12073 ; VLX: # %bb.0: # %entry
12074 ; VLX-NEXT: kmovd %edi, %k1
12075 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1}
12076 ; VLX-NEXT: kmovq %k0, %rax
12077 ; VLX-NEXT: vzeroupper
12080 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
12081 ; NoVLX: # %bb.0: # %entry
12082 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
12083 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
12084 ; NoVLX-NEXT: kmovw %edi, %k1
12085 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12086 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
12087 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
12088 ; NoVLX-NEXT: kmovw %k0, %eax
12089 ; NoVLX-NEXT: movzwl %ax, %eax
12090 ; NoVLX-NEXT: vzeroupper
12093 %0 = bitcast <4 x i64> %__a to <8 x i32>
12094 %1 = bitcast <4 x i64> %__b to <8 x i32>
12095 %2 = icmp sge <8 x i32> %0, %1
12096 %3 = bitcast i8 %__u to <8 x i1>
12097 %4 = and <8 x i1> %2, %3
12098 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12099 %6 = bitcast <64 x i1> %5 to i64
12103 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
12104 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
12105 ; VLX: # %bb.0: # %entry
12106 ; VLX-NEXT: kmovd %edi, %k1
12107 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
12108 ; VLX-NEXT: kmovq %k0, %rax
12109 ; VLX-NEXT: vzeroupper
12112 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
12113 ; NoVLX: # %bb.0: # %entry
12114 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
12115 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
12116 ; NoVLX-NEXT: kmovw %edi, %k1
12117 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12118 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
12119 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
12120 ; NoVLX-NEXT: kmovw %k0, %eax
12121 ; NoVLX-NEXT: movzwl %ax, %eax
12122 ; NoVLX-NEXT: vzeroupper
12125 %0 = bitcast <4 x i64> %__a to <8 x i32>
12126 %load = load <4 x i64>, <4 x i64>* %__b
12127 %1 = bitcast <4 x i64> %load to <8 x i32>
12128 %2 = icmp sge <8 x i32> %0, %1
12129 %3 = bitcast i8 %__u to <8 x i1>
12130 %4 = and <8 x i1> %2, %3
12131 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12132 %6 = bitcast <64 x i1> %5 to i64
12137 define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
12138 ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
12139 ; VLX: # %bb.0: # %entry
12140 ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0
12141 ; VLX-NEXT: kmovq %k0, %rax
12142 ; VLX-NEXT: vzeroupper
12145 ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
12146 ; NoVLX: # %bb.0: # %entry
12147 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
12148 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
12149 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12150 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
12151 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
12152 ; NoVLX-NEXT: kmovw %k0, %eax
12153 ; NoVLX-NEXT: movzwl %ax, %eax
12154 ; NoVLX-NEXT: vzeroupper
12157 %0 = bitcast <4 x i64> %__a to <8 x i32>
12158 %load = load i32, i32* %__b
12159 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12160 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12161 %2 = icmp sge <8 x i32> %0, %1
12162 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12163 %4 = bitcast <64 x i1> %3 to i64
12167 define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
12168 ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
12169 ; VLX: # %bb.0: # %entry
12170 ; VLX-NEXT: kmovd %edi, %k1
12171 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1}
12172 ; VLX-NEXT: kmovq %k0, %rax
12173 ; VLX-NEXT: vzeroupper
12176 ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
12177 ; NoVLX: # %bb.0: # %entry
12178 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
12179 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
12180 ; NoVLX-NEXT: kmovw %edi, %k1
12181 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12182 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
12183 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
12184 ; NoVLX-NEXT: kmovw %k0, %eax
12185 ; NoVLX-NEXT: movzwl %ax, %eax
12186 ; NoVLX-NEXT: vzeroupper
12189 %0 = bitcast <4 x i64> %__a to <8 x i32>
12190 %load = load i32, i32* %__b
12191 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
12192 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12193 %2 = icmp sge <8 x i32> %0, %1
12194 %3 = bitcast i8 %__u to <8 x i1>
12195 %4 = and <8 x i1> %3, %2
12196 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12197 %6 = bitcast <64 x i1> %5 to i64
12202 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12203 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12204 ; VLX: # %bb.0: # %entry
12205 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12206 ; VLX-NEXT: kmovd %k0, %eax
12207 ; VLX-NEXT: vzeroupper
12210 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
12211 ; NoVLX: # %bb.0: # %entry
12212 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12213 ; NoVLX-NEXT: kmovw %k0, %eax
12214 ; NoVLX-NEXT: vzeroupper
12217 %0 = bitcast <8 x i64> %__a to <16 x i32>
12218 %1 = bitcast <8 x i64> %__b to <16 x i32>
12219 %2 = icmp sge <16 x i32> %0, %1
12220 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12221 %4 = bitcast <32 x i1> %3 to i32
12225 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12226 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12227 ; VLX: # %bb.0: # %entry
12228 ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12229 ; VLX-NEXT: kmovd %k0, %eax
12230 ; VLX-NEXT: vzeroupper
12233 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
12234 ; NoVLX: # %bb.0: # %entry
12235 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12236 ; NoVLX-NEXT: kmovw %k0, %eax
12237 ; NoVLX-NEXT: vzeroupper
12240 %0 = bitcast <8 x i64> %__a to <16 x i32>
12241 %load = load <8 x i64>, <8 x i64>* %__b
12242 %1 = bitcast <8 x i64> %load to <16 x i32>
12243 %2 = icmp sge <16 x i32> %0, %1
12244 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12245 %4 = bitcast <32 x i1> %3 to i32
12249 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12250 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12251 ; VLX: # %bb.0: # %entry
12252 ; VLX-NEXT: kmovd %edi, %k1
12253 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12254 ; VLX-NEXT: kmovd %k0, %eax
12255 ; VLX-NEXT: vzeroupper
12258 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
12259 ; NoVLX: # %bb.0: # %entry
12260 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12261 ; NoVLX-NEXT: kmovw %k0, %eax
12262 ; NoVLX-NEXT: andl %edi, %eax
12263 ; NoVLX-NEXT: vzeroupper
12266 %0 = bitcast <8 x i64> %__a to <16 x i32>
12267 %1 = bitcast <8 x i64> %__b to <16 x i32>
12268 %2 = icmp sge <16 x i32> %0, %1
12269 %3 = bitcast i16 %__u to <16 x i1>
12270 %4 = and <16 x i1> %2, %3
12271 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12272 %6 = bitcast <32 x i1> %5 to i32
12276 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12277 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12278 ; VLX: # %bb.0: # %entry
12279 ; VLX-NEXT: kmovd %edi, %k1
12280 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12281 ; VLX-NEXT: kmovd %k0, %eax
12282 ; VLX-NEXT: vzeroupper
12285 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
12286 ; NoVLX: # %bb.0: # %entry
12287 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0
12288 ; NoVLX-NEXT: kmovw %k0, %eax
12289 ; NoVLX-NEXT: andl %edi, %eax
12290 ; NoVLX-NEXT: vzeroupper
12293 %0 = bitcast <8 x i64> %__a to <16 x i32>
12294 %load = load <8 x i64>, <8 x i64>* %__b
12295 %1 = bitcast <8 x i64> %load to <16 x i32>
12296 %2 = icmp sge <16 x i32> %0, %1
12297 %3 = bitcast i16 %__u to <16 x i1>
12298 %4 = and <16 x i1> %2, %3
12299 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12300 %6 = bitcast <32 x i1> %5 to i32
12305 define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
12306 ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12307 ; VLX: # %bb.0: # %entry
12308 ; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12309 ; VLX-NEXT: kmovd %k0, %eax
12310 ; VLX-NEXT: vzeroupper
12313 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
12314 ; NoVLX: # %bb.0: # %entry
12315 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12316 ; NoVLX-NEXT: kmovw %k0, %eax
12317 ; NoVLX-NEXT: vzeroupper
12320 %0 = bitcast <8 x i64> %__a to <16 x i32>
12321 %load = load i32, i32* %__b
12322 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12323 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12324 %2 = icmp sge <16 x i32> %0, %1
12325 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12326 %4 = bitcast <32 x i1> %3 to i32
12330 define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
12331 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12332 ; VLX: # %bb.0: # %entry
12333 ; VLX-NEXT: kmovd %edi, %k1
12334 ; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12335 ; VLX-NEXT: kmovd %k0, %eax
12336 ; VLX-NEXT: vzeroupper
12339 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
12340 ; NoVLX: # %bb.0: # %entry
12341 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12342 ; NoVLX-NEXT: kmovw %k0, %eax
12343 ; NoVLX-NEXT: andl %edi, %eax
12344 ; NoVLX-NEXT: vzeroupper
12347 %0 = bitcast <8 x i64> %__a to <16 x i32>
12348 %load = load i32, i32* %__b
12349 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12350 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12351 %2 = icmp sge <16 x i32> %0, %1
12352 %3 = bitcast i16 %__u to <16 x i1>
12353 %4 = and <16 x i1> %3, %2
12354 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
12355 %6 = bitcast <32 x i1> %5 to i32
12360 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12361 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12362 ; VLX: # %bb.0: # %entry
12363 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12364 ; VLX-NEXT: kmovq %k0, %rax
12365 ; VLX-NEXT: vzeroupper
12368 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
12369 ; NoVLX: # %bb.0: # %entry
12370 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12371 ; NoVLX-NEXT: kmovw %k0, %eax
12372 ; NoVLX-NEXT: movzwl %ax, %eax
12373 ; NoVLX-NEXT: vzeroupper
12376 %0 = bitcast <8 x i64> %__a to <16 x i32>
12377 %1 = bitcast <8 x i64> %__b to <16 x i32>
12378 %2 = icmp sge <16 x i32> %0, %1
12379 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12380 %4 = bitcast <64 x i1> %3 to i64
12384 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12385 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12386 ; VLX: # %bb.0: # %entry
12387 ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12388 ; VLX-NEXT: kmovq %k0, %rax
12389 ; VLX-NEXT: vzeroupper
12392 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
12393 ; NoVLX: # %bb.0: # %entry
12394 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
12395 ; NoVLX-NEXT: kmovw %k0, %eax
12396 ; NoVLX-NEXT: movzwl %ax, %eax
12397 ; NoVLX-NEXT: vzeroupper
12400 %0 = bitcast <8 x i64> %__a to <16 x i32>
12401 %load = load <8 x i64>, <8 x i64>* %__b
12402 %1 = bitcast <8 x i64> %load to <16 x i32>
12403 %2 = icmp sge <16 x i32> %0, %1
12404 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12405 %4 = bitcast <64 x i1> %3 to i64
12409 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
12410 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12411 ; VLX: # %bb.0: # %entry
12412 ; VLX-NEXT: kmovd %edi, %k1
12413 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
12414 ; VLX-NEXT: kmovq %k0, %rax
12415 ; VLX-NEXT: vzeroupper
12418 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
12419 ; NoVLX: # %bb.0: # %entry
12420 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
12421 ; NoVLX-NEXT: kmovw %k0, %eax
12422 ; NoVLX-NEXT: andl %edi, %eax
12423 ; NoVLX-NEXT: vzeroupper
12426 %0 = bitcast <8 x i64> %__a to <16 x i32>
12427 %1 = bitcast <8 x i64> %__b to <16 x i32>
12428 %2 = icmp sge <16 x i32> %0, %1
12429 %3 = bitcast i16 %__u to <16 x i1>
12430 %4 = and <16 x i1> %2, %3
12431 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12432 %6 = bitcast <64 x i1> %5 to i64
12436 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
12437 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12438 ; VLX: # %bb.0: # %entry
12439 ; VLX-NEXT: kmovd %edi, %k1
12440 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
12441 ; VLX-NEXT: kmovq %k0, %rax
12442 ; VLX-NEXT: vzeroupper
12445 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
12446 ; NoVLX: # %bb.0: # %entry
12447 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0
12448 ; NoVLX-NEXT: kmovw %k0, %eax
12449 ; NoVLX-NEXT: andl %edi, %eax
12450 ; NoVLX-NEXT: vzeroupper
12453 %0 = bitcast <8 x i64> %__a to <16 x i32>
12454 %load = load <8 x i64>, <8 x i64>* %__b
12455 %1 = bitcast <8 x i64> %load to <16 x i32>
12456 %2 = icmp sge <16 x i32> %0, %1
12457 %3 = bitcast i16 %__u to <16 x i1>
12458 %4 = and <16 x i1> %2, %3
12459 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12460 %6 = bitcast <64 x i1> %5 to i64
12465 define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
12466 ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12467 ; VLX: # %bb.0: # %entry
12468 ; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12469 ; VLX-NEXT: kmovq %k0, %rax
12470 ; VLX-NEXT: vzeroupper
12473 ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
12474 ; NoVLX: # %bb.0: # %entry
12475 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
12476 ; NoVLX-NEXT: kmovw %k0, %eax
12477 ; NoVLX-NEXT: movzwl %ax, %eax
12478 ; NoVLX-NEXT: vzeroupper
12481 %0 = bitcast <8 x i64> %__a to <16 x i32>
12482 %load = load i32, i32* %__b
12483 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12484 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12485 %2 = icmp sge <16 x i32> %0, %1
12486 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12487 %4 = bitcast <64 x i1> %3 to i64
12491 define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
12492 ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12493 ; VLX: # %bb.0: # %entry
12494 ; VLX-NEXT: kmovd %edi, %k1
12495 ; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
12496 ; VLX-NEXT: kmovq %k0, %rax
12497 ; VLX-NEXT: vzeroupper
12500 ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
12501 ; NoVLX: # %bb.0: # %entry
12502 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0
12503 ; NoVLX-NEXT: kmovw %k0, %eax
12504 ; NoVLX-NEXT: andl %edi, %eax
12505 ; NoVLX-NEXT: vzeroupper
12508 %0 = bitcast <8 x i64> %__a to <16 x i32>
12509 %load = load i32, i32* %__b
12510 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
12511 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
12512 %2 = icmp sge <16 x i32> %0, %1
12513 %3 = bitcast i16 %__u to <16 x i1>
12514 %4 = and <16 x i1> %3, %2
12515 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
12516 %6 = bitcast <64 x i1> %5 to i64
12521 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12522 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12523 ; VLX: # %bb.0: # %entry
12524 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12525 ; VLX-NEXT: kmovb %k0, %eax
12528 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
12529 ; NoVLX: # %bb.0: # %entry
12530 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12531 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12532 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12533 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12534 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12535 ; NoVLX-NEXT: kmovw %k0, %eax
12536 ; NoVLX-NEXT: andl $3, %eax
12537 ; NoVLX-NEXT: vzeroupper
12540 %0 = bitcast <2 x i64> %__a to <2 x i64>
12541 %1 = bitcast <2 x i64> %__b to <2 x i64>
12542 %2 = icmp sge <2 x i64> %0, %1
12543 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12544 %4 = bitcast <4 x i1> %3 to i4
12548 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12549 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12550 ; VLX: # %bb.0: # %entry
12551 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12552 ; VLX-NEXT: kmovb %k0, %eax
12555 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
12556 ; NoVLX: # %bb.0: # %entry
12557 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12558 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12559 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12560 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12561 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12562 ; NoVLX-NEXT: kmovw %k0, %eax
12563 ; NoVLX-NEXT: andl $3, %eax
12564 ; NoVLX-NEXT: vzeroupper
12567 %0 = bitcast <2 x i64> %__a to <2 x i64>
12568 %load = load <2 x i64>, <2 x i64>* %__b
12569 %1 = bitcast <2 x i64> %load to <2 x i64>
12570 %2 = icmp sge <2 x i64> %0, %1
12571 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12572 %4 = bitcast <4 x i1> %3 to i4
12576 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12577 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12578 ; VLX: # %bb.0: # %entry
12579 ; VLX-NEXT: kmovd %edi, %k1
12580 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12581 ; VLX-NEXT: kmovb %k0, %eax
12584 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
12585 ; NoVLX: # %bb.0: # %entry
12586 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12587 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12588 ; NoVLX-NEXT: kmovw %edi, %k1
12589 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12590 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12591 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12592 ; NoVLX-NEXT: kmovw %k0, %eax
12593 ; NoVLX-NEXT: andl $3, %eax
12594 ; NoVLX-NEXT: vzeroupper
12597 %0 = bitcast <2 x i64> %__a to <2 x i64>
12598 %1 = bitcast <2 x i64> %__b to <2 x i64>
12599 %2 = icmp sge <2 x i64> %0, %1
12600 %3 = bitcast i8 %__u to <8 x i1>
12601 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12602 %4 = and <2 x i1> %2, %extract.i
12603 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12604 %6 = bitcast <4 x i1> %5 to i4
12608 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12609 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12610 ; VLX: # %bb.0: # %entry
12611 ; VLX-NEXT: kmovd %edi, %k1
12612 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12613 ; VLX-NEXT: kmovb %k0, %eax
12616 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
12617 ; NoVLX: # %bb.0: # %entry
12618 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12619 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12620 ; NoVLX-NEXT: kmovw %edi, %k1
12621 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12622 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12623 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12624 ; NoVLX-NEXT: kmovw %k0, %eax
12625 ; NoVLX-NEXT: andl $3, %eax
12626 ; NoVLX-NEXT: vzeroupper
12629 %0 = bitcast <2 x i64> %__a to <2 x i64>
12630 %load = load <2 x i64>, <2 x i64>* %__b
12631 %1 = bitcast <2 x i64> %load to <2 x i64>
12632 %2 = icmp sge <2 x i64> %0, %1
12633 %3 = bitcast i8 %__u to <8 x i1>
12634 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12635 %4 = and <2 x i1> %2, %extract.i
12636 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12637 %6 = bitcast <4 x i1> %5 to i4
12642 define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12643 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12644 ; VLX: # %bb.0: # %entry
12645 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12646 ; VLX-NEXT: kmovb %k0, %eax
12649 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12650 ; NoVLX: # %bb.0: # %entry
12651 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12652 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
12653 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12654 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12655 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12656 ; NoVLX-NEXT: kmovw %k0, %eax
12657 ; NoVLX-NEXT: andl $3, %eax
12658 ; NoVLX-NEXT: vzeroupper
12661 %0 = bitcast <2 x i64> %__a to <2 x i64>
12662 %load = load i64, i64* %__b
12663 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12664 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12665 %2 = icmp sge <2 x i64> %0, %1
12666 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12667 %4 = bitcast <4 x i1> %3 to i4
12671 define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
12672 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12673 ; VLX: # %bb.0: # %entry
12674 ; VLX-NEXT: kmovd %edi, %k1
12675 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12676 ; VLX-NEXT: kmovb %k0, %eax
12679 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
12680 ; NoVLX: # %bb.0: # %entry
12681 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12682 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
12683 ; NoVLX-NEXT: kmovw %edi, %k1
12684 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12685 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12686 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12687 ; NoVLX-NEXT: kmovw %k0, %eax
12688 ; NoVLX-NEXT: andl $3, %eax
12689 ; NoVLX-NEXT: vzeroupper
12692 %0 = bitcast <2 x i64> %__a to <2 x i64>
12693 %load = load i64, i64* %__b
12694 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12695 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12696 %2 = icmp sge <2 x i64> %0, %1
12697 %3 = bitcast i8 %__u to <8 x i1>
12698 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12699 %4 = and <2 x i1> %extract.i, %2
12700 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12701 %6 = bitcast <4 x i1> %5 to i4
12706 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12707 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12708 ; VLX: # %bb.0: # %entry
12709 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12710 ; VLX-NEXT: kmovd %k0, %eax
12711 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12714 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
12715 ; NoVLX: # %bb.0: # %entry
12716 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12717 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12718 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12719 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12720 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12721 ; NoVLX-NEXT: kmovw %k0, %eax
12722 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12723 ; NoVLX-NEXT: vzeroupper
12726 %0 = bitcast <2 x i64> %__a to <2 x i64>
12727 %1 = bitcast <2 x i64> %__b to <2 x i64>
12728 %2 = icmp sge <2 x i64> %0, %1
12729 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12730 %4 = bitcast <8 x i1> %3 to i8
12734 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12735 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12736 ; VLX: # %bb.0: # %entry
12737 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12738 ; VLX-NEXT: kmovd %k0, %eax
12739 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12742 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
12743 ; NoVLX: # %bb.0: # %entry
12744 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12745 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12746 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12747 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12748 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12749 ; NoVLX-NEXT: kmovw %k0, %eax
12750 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12751 ; NoVLX-NEXT: vzeroupper
12754 %0 = bitcast <2 x i64> %__a to <2 x i64>
12755 %load = load <2 x i64>, <2 x i64>* %__b
12756 %1 = bitcast <2 x i64> %load to <2 x i64>
12757 %2 = icmp sge <2 x i64> %0, %1
12758 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12759 %4 = bitcast <8 x i1> %3 to i8
12763 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12764 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12765 ; VLX: # %bb.0: # %entry
12766 ; VLX-NEXT: kmovd %edi, %k1
12767 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12768 ; VLX-NEXT: kmovd %k0, %eax
12769 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12772 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
12773 ; NoVLX: # %bb.0: # %entry
12774 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12775 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12776 ; NoVLX-NEXT: kmovw %edi, %k1
12777 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12778 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12779 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12780 ; NoVLX-NEXT: kmovw %k0, %eax
12781 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12782 ; NoVLX-NEXT: vzeroupper
12785 %0 = bitcast <2 x i64> %__a to <2 x i64>
12786 %1 = bitcast <2 x i64> %__b to <2 x i64>
12787 %2 = icmp sge <2 x i64> %0, %1
12788 %3 = bitcast i8 %__u to <8 x i1>
12789 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12790 %4 = and <2 x i1> %2, %extract.i
12791 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12792 %6 = bitcast <8 x i1> %5 to i8
12796 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12797 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12798 ; VLX: # %bb.0: # %entry
12799 ; VLX-NEXT: kmovd %edi, %k1
12800 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12801 ; VLX-NEXT: kmovd %k0, %eax
12802 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12805 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
12806 ; NoVLX: # %bb.0: # %entry
12807 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12808 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
12809 ; NoVLX-NEXT: kmovw %edi, %k1
12810 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12811 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12812 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12813 ; NoVLX-NEXT: kmovw %k0, %eax
12814 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12815 ; NoVLX-NEXT: vzeroupper
12818 %0 = bitcast <2 x i64> %__a to <2 x i64>
12819 %load = load <2 x i64>, <2 x i64>* %__b
12820 %1 = bitcast <2 x i64> %load to <2 x i64>
12821 %2 = icmp sge <2 x i64> %0, %1
12822 %3 = bitcast i8 %__u to <8 x i1>
12823 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12824 %4 = and <2 x i1> %2, %extract.i
12825 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12826 %6 = bitcast <8 x i1> %5 to i8
12831 define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
12832 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12833 ; VLX: # %bb.0: # %entry
12834 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
12835 ; VLX-NEXT: kmovd %k0, %eax
12836 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12839 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12840 ; NoVLX: # %bb.0: # %entry
12841 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12842 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
12843 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12844 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12845 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12846 ; NoVLX-NEXT: kmovw %k0, %eax
12847 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12848 ; NoVLX-NEXT: vzeroupper
12851 %0 = bitcast <2 x i64> %__a to <2 x i64>
12852 %load = load i64, i64* %__b
12853 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12854 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12855 %2 = icmp sge <2 x i64> %0, %1
12856 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12857 %4 = bitcast <8 x i1> %3 to i8
12861 define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
12862 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12863 ; VLX: # %bb.0: # %entry
12864 ; VLX-NEXT: kmovd %edi, %k1
12865 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
12866 ; VLX-NEXT: kmovd %k0, %eax
12867 ; VLX-NEXT: # kill: def $al killed $al killed $eax
12870 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
12871 ; NoVLX: # %bb.0: # %entry
12872 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12873 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
12874 ; NoVLX-NEXT: kmovw %edi, %k1
12875 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12876 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12877 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12878 ; NoVLX-NEXT: kmovw %k0, %eax
12879 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
12880 ; NoVLX-NEXT: vzeroupper
12883 %0 = bitcast <2 x i64> %__a to <2 x i64>
12884 %load = load i64, i64* %__b
12885 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
12886 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
12887 %2 = icmp sge <2 x i64> %0, %1
12888 %3 = bitcast i8 %__u to <8 x i1>
12889 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12890 %4 = and <2 x i1> %extract.i, %2
12891 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12892 %6 = bitcast <8 x i1> %5 to i8
12897 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12898 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12899 ; VLX: # %bb.0: # %entry
12900 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
12901 ; VLX-NEXT: kmovd %k0, %eax
12902 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12905 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
12906 ; NoVLX: # %bb.0: # %entry
12907 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12908 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12909 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12910 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12911 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12912 ; NoVLX-NEXT: kmovw %k0, %eax
12913 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12914 ; NoVLX-NEXT: vzeroupper
12917 %0 = bitcast <2 x i64> %__a to <2 x i64>
12918 %1 = bitcast <2 x i64> %__b to <2 x i64>
12919 %2 = icmp sge <2 x i64> %0, %1
12920 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12921 %4 = bitcast <16 x i1> %3 to i16
12925 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12926 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12927 ; VLX: # %bb.0: # %entry
12928 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
12929 ; VLX-NEXT: kmovd %k0, %eax
12930 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12933 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
12934 ; NoVLX: # %bb.0: # %entry
12935 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12936 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
12937 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
12938 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12939 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12940 ; NoVLX-NEXT: kmovw %k0, %eax
12941 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12942 ; NoVLX-NEXT: vzeroupper
12945 %0 = bitcast <2 x i64> %__a to <2 x i64>
12946 %load = load <2 x i64>, <2 x i64>* %__b
12947 %1 = bitcast <2 x i64> %load to <2 x i64>
12948 %2 = icmp sge <2 x i64> %0, %1
12949 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12950 %4 = bitcast <16 x i1> %3 to i16
12954 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
12955 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
12956 ; VLX: # %bb.0: # %entry
12957 ; VLX-NEXT: kmovd %edi, %k1
12958 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
12959 ; VLX-NEXT: kmovd %k0, %eax
12960 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12963 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
12964 ; NoVLX: # %bb.0: # %entry
12965 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
12966 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12967 ; NoVLX-NEXT: kmovw %edi, %k1
12968 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
12969 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
12970 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
12971 ; NoVLX-NEXT: kmovw %k0, %eax
12972 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
12973 ; NoVLX-NEXT: vzeroupper
12976 %0 = bitcast <2 x i64> %__a to <2 x i64>
12977 %1 = bitcast <2 x i64> %__b to <2 x i64>
12978 %2 = icmp sge <2 x i64> %0, %1
12979 %3 = bitcast i8 %__u to <8 x i1>
12980 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
12981 %4 = and <2 x i1> %2, %extract.i
12982 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
12983 %6 = bitcast <16 x i1> %5 to i16
12987 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
12988 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
12989 ; VLX: # %bb.0: # %entry
12990 ; VLX-NEXT: kmovd %edi, %k1
12991 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
12992 ; VLX-NEXT: kmovd %k0, %eax
12993 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
12996 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
12997 ; NoVLX: # %bb.0: # %entry
12998 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12999 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
13000 ; NoVLX-NEXT: kmovw %edi, %k1
13001 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13002 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13003 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13004 ; NoVLX-NEXT: kmovw %k0, %eax
13005 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13006 ; NoVLX-NEXT: vzeroupper
13009 %0 = bitcast <2 x i64> %__a to <2 x i64>
13010 %load = load <2 x i64>, <2 x i64>* %__b
13011 %1 = bitcast <2 x i64> %load to <2 x i64>
13012 %2 = icmp sge <2 x i64> %0, %1
13013 %3 = bitcast i8 %__u to <8 x i1>
13014 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13015 %4 = and <2 x i1> %2, %extract.i
13016 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13017 %6 = bitcast <16 x i1> %5 to i16
13022 define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
13023 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
13024 ; VLX: # %bb.0: # %entry
13025 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13026 ; VLX-NEXT: kmovd %k0, %eax
13027 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13030 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
13031 ; NoVLX: # %bb.0: # %entry
13032 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13033 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
13034 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13035 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13036 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13037 ; NoVLX-NEXT: kmovw %k0, %eax
13038 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13039 ; NoVLX-NEXT: vzeroupper
13042 %0 = bitcast <2 x i64> %__a to <2 x i64>
13043 %load = load i64, i64* %__b
13044 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13045 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13046 %2 = icmp sge <2 x i64> %0, %1
13047 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13048 %4 = bitcast <16 x i1> %3 to i16
13052 define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
13053 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
13054 ; VLX: # %bb.0: # %entry
13055 ; VLX-NEXT: kmovd %edi, %k1
13056 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13057 ; VLX-NEXT: kmovd %k0, %eax
13058 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13061 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
13062 ; NoVLX: # %bb.0: # %entry
13063 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13064 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
13065 ; NoVLX-NEXT: kmovw %edi, %k1
13066 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13067 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13068 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13069 ; NoVLX-NEXT: kmovw %k0, %eax
13070 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13071 ; NoVLX-NEXT: vzeroupper
13074 %0 = bitcast <2 x i64> %__a to <2 x i64>
13075 %load = load i64, i64* %__b
13076 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13077 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13078 %2 = icmp sge <2 x i64> %0, %1
13079 %3 = bitcast i8 %__u to <8 x i1>
13080 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13081 %4 = and <2 x i1> %extract.i, %2
13082 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13083 %6 = bitcast <16 x i1> %5 to i16
13088 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13089 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
13090 ; VLX: # %bb.0: # %entry
13091 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
13092 ; VLX-NEXT: kmovd %k0, %eax
13095 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
13096 ; NoVLX: # %bb.0: # %entry
13097 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13098 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13099 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13100 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13101 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13102 ; NoVLX-NEXT: kmovw %k0, %eax
13103 ; NoVLX-NEXT: vzeroupper
13106 %0 = bitcast <2 x i64> %__a to <2 x i64>
13107 %1 = bitcast <2 x i64> %__b to <2 x i64>
13108 %2 = icmp sge <2 x i64> %0, %1
13109 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13110 %4 = bitcast <32 x i1> %3 to i32
13114 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13115 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
13116 ; VLX: # %bb.0: # %entry
13117 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
13118 ; VLX-NEXT: kmovd %k0, %eax
13121 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
13122 ; NoVLX: # %bb.0: # %entry
13123 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13124 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
13125 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13126 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13127 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13128 ; NoVLX-NEXT: kmovw %k0, %eax
13129 ; NoVLX-NEXT: vzeroupper
13132 %0 = bitcast <2 x i64> %__a to <2 x i64>
13133 %load = load <2 x i64>, <2 x i64>* %__b
13134 %1 = bitcast <2 x i64> %load to <2 x i64>
13135 %2 = icmp sge <2 x i64> %0, %1
13136 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13137 %4 = bitcast <32 x i1> %3 to i32
13141 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13142 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
13143 ; VLX: # %bb.0: # %entry
13144 ; VLX-NEXT: kmovd %edi, %k1
13145 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
13146 ; VLX-NEXT: kmovd %k0, %eax
13149 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
13150 ; NoVLX: # %bb.0: # %entry
13151 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13152 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13153 ; NoVLX-NEXT: kmovw %edi, %k1
13154 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13155 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13156 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13157 ; NoVLX-NEXT: kmovw %k0, %eax
13158 ; NoVLX-NEXT: vzeroupper
13161 %0 = bitcast <2 x i64> %__a to <2 x i64>
13162 %1 = bitcast <2 x i64> %__b to <2 x i64>
13163 %2 = icmp sge <2 x i64> %0, %1
13164 %3 = bitcast i8 %__u to <8 x i1>
13165 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13166 %4 = and <2 x i1> %2, %extract.i
13167 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13168 %6 = bitcast <32 x i1> %5 to i32
13172 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13173 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
13174 ; VLX: # %bb.0: # %entry
13175 ; VLX-NEXT: kmovd %edi, %k1
13176 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
13177 ; VLX-NEXT: kmovd %k0, %eax
13180 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
13181 ; NoVLX: # %bb.0: # %entry
13182 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13183 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
13184 ; NoVLX-NEXT: kmovw %edi, %k1
13185 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13186 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13187 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13188 ; NoVLX-NEXT: kmovw %k0, %eax
13189 ; NoVLX-NEXT: vzeroupper
13192 %0 = bitcast <2 x i64> %__a to <2 x i64>
13193 %load = load <2 x i64>, <2 x i64>* %__b
13194 %1 = bitcast <2 x i64> %load to <2 x i64>
13195 %2 = icmp sge <2 x i64> %0, %1
13196 %3 = bitcast i8 %__u to <8 x i1>
13197 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13198 %4 = and <2 x i1> %2, %extract.i
13199 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13200 %6 = bitcast <32 x i1> %5 to i32
13205 define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
13206 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13207 ; VLX: # %bb.0: # %entry
13208 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13209 ; VLX-NEXT: kmovd %k0, %eax
13212 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13213 ; NoVLX: # %bb.0: # %entry
13214 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13215 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
13216 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13217 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13218 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13219 ; NoVLX-NEXT: kmovw %k0, %eax
13220 ; NoVLX-NEXT: vzeroupper
13223 %0 = bitcast <2 x i64> %__a to <2 x i64>
13224 %load = load i64, i64* %__b
13225 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13226 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13227 %2 = icmp sge <2 x i64> %0, %1
13228 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13229 %4 = bitcast <32 x i1> %3 to i32
13233 define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
13234 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13235 ; VLX: # %bb.0: # %entry
13236 ; VLX-NEXT: kmovd %edi, %k1
13237 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13238 ; VLX-NEXT: kmovd %k0, %eax
13241 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
13242 ; NoVLX: # %bb.0: # %entry
13243 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13244 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
13245 ; NoVLX-NEXT: kmovw %edi, %k1
13246 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13247 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13248 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13249 ; NoVLX-NEXT: kmovw %k0, %eax
13250 ; NoVLX-NEXT: vzeroupper
13253 %0 = bitcast <2 x i64> %__a to <2 x i64>
13254 %load = load i64, i64* %__b
13255 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13256 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13257 %2 = icmp sge <2 x i64> %0, %1
13258 %3 = bitcast i8 %__u to <8 x i1>
13259 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13260 %4 = and <2 x i1> %extract.i, %2
13261 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13262 %6 = bitcast <32 x i1> %5 to i32
13267 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13268 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13269 ; VLX: # %bb.0: # %entry
13270 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0
13271 ; VLX-NEXT: kmovq %k0, %rax
13274 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
13275 ; NoVLX: # %bb.0: # %entry
13276 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13277 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13278 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13279 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13280 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13281 ; NoVLX-NEXT: kmovw %k0, %eax
13282 ; NoVLX-NEXT: movzwl %ax, %eax
13283 ; NoVLX-NEXT: vzeroupper
13286 %0 = bitcast <2 x i64> %__a to <2 x i64>
13287 %1 = bitcast <2 x i64> %__b to <2 x i64>
13288 %2 = icmp sge <2 x i64> %0, %1
13289 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13290 %4 = bitcast <64 x i1> %3 to i64
13294 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13295 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13296 ; VLX: # %bb.0: # %entry
13297 ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
13298 ; VLX-NEXT: kmovq %k0, %rax
13301 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
13302 ; NoVLX: # %bb.0: # %entry
13303 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13304 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
13305 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13306 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13307 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13308 ; NoVLX-NEXT: kmovw %k0, %eax
13309 ; NoVLX-NEXT: movzwl %ax, %eax
13310 ; NoVLX-NEXT: vzeroupper
13313 %0 = bitcast <2 x i64> %__a to <2 x i64>
13314 %load = load <2 x i64>, <2 x i64>* %__b
13315 %1 = bitcast <2 x i64> %load to <2 x i64>
13316 %2 = icmp sge <2 x i64> %0, %1
13317 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13318 %4 = bitcast <64 x i1> %3 to i64
13322 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
13323 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13324 ; VLX: # %bb.0: # %entry
13325 ; VLX-NEXT: kmovd %edi, %k1
13326 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1}
13327 ; VLX-NEXT: kmovq %k0, %rax
13330 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
13331 ; NoVLX: # %bb.0: # %entry
13332 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
13333 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13334 ; NoVLX-NEXT: kmovw %edi, %k1
13335 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13336 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13337 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13338 ; NoVLX-NEXT: kmovw %k0, %eax
13339 ; NoVLX-NEXT: movzwl %ax, %eax
13340 ; NoVLX-NEXT: vzeroupper
13343 %0 = bitcast <2 x i64> %__a to <2 x i64>
13344 %1 = bitcast <2 x i64> %__b to <2 x i64>
13345 %2 = icmp sge <2 x i64> %0, %1
13346 %3 = bitcast i8 %__u to <8 x i1>
13347 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13348 %4 = and <2 x i1> %2, %extract.i
13349 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13350 %6 = bitcast <64 x i1> %5 to i64
13354 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
13355 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13356 ; VLX: # %bb.0: # %entry
13357 ; VLX-NEXT: kmovd %edi, %k1
13358 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
13359 ; VLX-NEXT: kmovq %k0, %rax
13362 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
13363 ; NoVLX: # %bb.0: # %entry
13364 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13365 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
13366 ; NoVLX-NEXT: kmovw %edi, %k1
13367 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13368 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13369 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13370 ; NoVLX-NEXT: kmovw %k0, %eax
13371 ; NoVLX-NEXT: movzwl %ax, %eax
13372 ; NoVLX-NEXT: vzeroupper
13375 %0 = bitcast <2 x i64> %__a to <2 x i64>
13376 %load = load <2 x i64>, <2 x i64>* %__b
13377 %1 = bitcast <2 x i64> %load to <2 x i64>
13378 %2 = icmp sge <2 x i64> %0, %1
13379 %3 = bitcast i8 %__u to <8 x i1>
13380 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13381 %4 = and <2 x i1> %2, %extract.i
13382 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13383 %6 = bitcast <64 x i1> %5 to i64
13388 define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
13389 ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13390 ; VLX: # %bb.0: # %entry
13391 ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0
13392 ; VLX-NEXT: kmovq %k0, %rax
13395 ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13396 ; NoVLX: # %bb.0: # %entry
13397 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13398 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
13399 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13400 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13401 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13402 ; NoVLX-NEXT: kmovw %k0, %eax
13403 ; NoVLX-NEXT: movzwl %ax, %eax
13404 ; NoVLX-NEXT: vzeroupper
13407 %0 = bitcast <2 x i64> %__a to <2 x i64>
13408 %load = load i64, i64* %__b
13409 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13410 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13411 %2 = icmp sge <2 x i64> %0, %1
13412 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13413 %4 = bitcast <64 x i1> %3 to i64
13417 define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
13418 ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13419 ; VLX: # %bb.0: # %entry
13420 ; VLX-NEXT: kmovd %edi, %k1
13421 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1}
13422 ; VLX-NEXT: kmovq %k0, %rax
13425 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
13426 ; NoVLX: # %bb.0: # %entry
13427 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13428 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
13429 ; NoVLX-NEXT: kmovw %edi, %k1
13430 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13431 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
13432 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
13433 ; NoVLX-NEXT: kmovw %k0, %eax
13434 ; NoVLX-NEXT: movzwl %ax, %eax
13435 ; NoVLX-NEXT: vzeroupper
13438 %0 = bitcast <2 x i64> %__a to <2 x i64>
13439 %load = load i64, i64* %__b
13440 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
13441 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
13442 %2 = icmp sge <2 x i64> %0, %1
13443 %3 = bitcast i8 %__u to <8 x i1>
13444 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
13445 %4 = and <2 x i1> %extract.i, %2
13446 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13447 %6 = bitcast <64 x i1> %5 to i64
13452 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13453 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13454 ; VLX: # %bb.0: # %entry
13455 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13456 ; VLX-NEXT: kmovd %k0, %eax
13457 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13458 ; VLX-NEXT: vzeroupper
13461 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
13462 ; NoVLX: # %bb.0: # %entry
13463 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13464 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13465 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13466 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13467 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13468 ; NoVLX-NEXT: kmovw %k0, %eax
13469 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13470 ; NoVLX-NEXT: vzeroupper
13473 %0 = bitcast <4 x i64> %__a to <4 x i64>
13474 %1 = bitcast <4 x i64> %__b to <4 x i64>
13475 %2 = icmp sge <4 x i64> %0, %1
13476 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13477 %4 = bitcast <8 x i1> %3 to i8
13481 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13482 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13483 ; VLX: # %bb.0: # %entry
13484 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13485 ; VLX-NEXT: kmovd %k0, %eax
13486 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13487 ; VLX-NEXT: vzeroupper
13490 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
13491 ; NoVLX: # %bb.0: # %entry
13492 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13493 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13494 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13495 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13496 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13497 ; NoVLX-NEXT: kmovw %k0, %eax
13498 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13499 ; NoVLX-NEXT: vzeroupper
13502 %0 = bitcast <4 x i64> %__a to <4 x i64>
13503 %load = load <4 x i64>, <4 x i64>* %__b
13504 %1 = bitcast <4 x i64> %load to <4 x i64>
13505 %2 = icmp sge <4 x i64> %0, %1
13506 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13507 %4 = bitcast <8 x i1> %3 to i8
13511 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13512 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13513 ; VLX: # %bb.0: # %entry
13514 ; VLX-NEXT: kmovd %edi, %k1
13515 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13516 ; VLX-NEXT: kmovd %k0, %eax
13517 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13518 ; VLX-NEXT: vzeroupper
13521 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
13522 ; NoVLX: # %bb.0: # %entry
13523 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13524 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13525 ; NoVLX-NEXT: kmovw %edi, %k1
13526 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13527 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13528 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13529 ; NoVLX-NEXT: kmovw %k0, %eax
13530 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13531 ; NoVLX-NEXT: vzeroupper
13534 %0 = bitcast <4 x i64> %__a to <4 x i64>
13535 %1 = bitcast <4 x i64> %__b to <4 x i64>
13536 %2 = icmp sge <4 x i64> %0, %1
13537 %3 = bitcast i8 %__u to <8 x i1>
13538 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13539 %4 = and <4 x i1> %2, %extract.i
13540 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13541 %6 = bitcast <8 x i1> %5 to i8
13545 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13546 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13547 ; VLX: # %bb.0: # %entry
13548 ; VLX-NEXT: kmovd %edi, %k1
13549 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13550 ; VLX-NEXT: kmovd %k0, %eax
13551 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13552 ; VLX-NEXT: vzeroupper
13555 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
13556 ; NoVLX: # %bb.0: # %entry
13557 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13558 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13559 ; NoVLX-NEXT: kmovw %edi, %k1
13560 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13561 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13562 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13563 ; NoVLX-NEXT: kmovw %k0, %eax
13564 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13565 ; NoVLX-NEXT: vzeroupper
13568 %0 = bitcast <4 x i64> %__a to <4 x i64>
13569 %load = load <4 x i64>, <4 x i64>* %__b
13570 %1 = bitcast <4 x i64> %load to <4 x i64>
13571 %2 = icmp sge <4 x i64> %0, %1
13572 %3 = bitcast i8 %__u to <8 x i1>
13573 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13574 %4 = and <4 x i1> %2, %extract.i
13575 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13576 %6 = bitcast <8 x i1> %5 to i8
13581 define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13582 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13583 ; VLX: # %bb.0: # %entry
13584 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13585 ; VLX-NEXT: kmovd %k0, %eax
13586 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13587 ; VLX-NEXT: vzeroupper
13590 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13591 ; NoVLX: # %bb.0: # %entry
13592 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13593 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
13594 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13595 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13596 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13597 ; NoVLX-NEXT: kmovw %k0, %eax
13598 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13599 ; NoVLX-NEXT: vzeroupper
13602 %0 = bitcast <4 x i64> %__a to <4 x i64>
13603 %load = load i64, i64* %__b
13604 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13605 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13606 %2 = icmp sge <4 x i64> %0, %1
13607 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13608 %4 = bitcast <8 x i1> %3 to i8
13612 define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13613 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13614 ; VLX: # %bb.0: # %entry
13615 ; VLX-NEXT: kmovd %edi, %k1
13616 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13617 ; VLX-NEXT: kmovd %k0, %eax
13618 ; VLX-NEXT: # kill: def $al killed $al killed $eax
13619 ; VLX-NEXT: vzeroupper
13622 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
13623 ; NoVLX: # %bb.0: # %entry
13624 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13625 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
13626 ; NoVLX-NEXT: kmovw %edi, %k1
13627 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13628 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13629 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13630 ; NoVLX-NEXT: kmovw %k0, %eax
13631 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
13632 ; NoVLX-NEXT: vzeroupper
13635 %0 = bitcast <4 x i64> %__a to <4 x i64>
13636 %load = load i64, i64* %__b
13637 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13638 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13639 %2 = icmp sge <4 x i64> %0, %1
13640 %3 = bitcast i8 %__u to <8 x i1>
13641 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13642 %4 = and <4 x i1> %extract.i, %2
13643 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13644 %6 = bitcast <8 x i1> %5 to i8
13649 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13650 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13651 ; VLX: # %bb.0: # %entry
13652 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13653 ; VLX-NEXT: kmovd %k0, %eax
13654 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13655 ; VLX-NEXT: vzeroupper
13658 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
13659 ; NoVLX: # %bb.0: # %entry
13660 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13661 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13662 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13663 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13664 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13665 ; NoVLX-NEXT: kmovw %k0, %eax
13666 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13667 ; NoVLX-NEXT: vzeroupper
13670 %0 = bitcast <4 x i64> %__a to <4 x i64>
13671 %1 = bitcast <4 x i64> %__b to <4 x i64>
13672 %2 = icmp sge <4 x i64> %0, %1
13673 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13674 %4 = bitcast <16 x i1> %3 to i16
13678 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13679 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13680 ; VLX: # %bb.0: # %entry
13681 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13682 ; VLX-NEXT: kmovd %k0, %eax
13683 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13684 ; VLX-NEXT: vzeroupper
13687 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
13688 ; NoVLX: # %bb.0: # %entry
13689 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13690 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13691 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13692 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13693 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13694 ; NoVLX-NEXT: kmovw %k0, %eax
13695 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13696 ; NoVLX-NEXT: vzeroupper
13699 %0 = bitcast <4 x i64> %__a to <4 x i64>
13700 %load = load <4 x i64>, <4 x i64>* %__b
13701 %1 = bitcast <4 x i64> %load to <4 x i64>
13702 %2 = icmp sge <4 x i64> %0, %1
13703 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13704 %4 = bitcast <16 x i1> %3 to i16
13708 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13709 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13710 ; VLX: # %bb.0: # %entry
13711 ; VLX-NEXT: kmovd %edi, %k1
13712 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13713 ; VLX-NEXT: kmovd %k0, %eax
13714 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13715 ; VLX-NEXT: vzeroupper
13718 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
13719 ; NoVLX: # %bb.0: # %entry
13720 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13721 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13722 ; NoVLX-NEXT: kmovw %edi, %k1
13723 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13724 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13725 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13726 ; NoVLX-NEXT: kmovw %k0, %eax
13727 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13728 ; NoVLX-NEXT: vzeroupper
13731 %0 = bitcast <4 x i64> %__a to <4 x i64>
13732 %1 = bitcast <4 x i64> %__b to <4 x i64>
13733 %2 = icmp sge <4 x i64> %0, %1
13734 %3 = bitcast i8 %__u to <8 x i1>
13735 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13736 %4 = and <4 x i1> %2, %extract.i
13737 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13738 %6 = bitcast <16 x i1> %5 to i16
13742 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13743 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13744 ; VLX: # %bb.0: # %entry
13745 ; VLX-NEXT: kmovd %edi, %k1
13746 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13747 ; VLX-NEXT: kmovd %k0, %eax
13748 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13749 ; VLX-NEXT: vzeroupper
13752 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
13753 ; NoVLX: # %bb.0: # %entry
13754 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13755 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13756 ; NoVLX-NEXT: kmovw %edi, %k1
13757 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13758 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13759 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13760 ; NoVLX-NEXT: kmovw %k0, %eax
13761 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13762 ; NoVLX-NEXT: vzeroupper
13765 %0 = bitcast <4 x i64> %__a to <4 x i64>
13766 %load = load <4 x i64>, <4 x i64>* %__b
13767 %1 = bitcast <4 x i64> %load to <4 x i64>
13768 %2 = icmp sge <4 x i64> %0, %1
13769 %3 = bitcast i8 %__u to <8 x i1>
13770 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13771 %4 = and <4 x i1> %2, %extract.i
13772 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13773 %6 = bitcast <16 x i1> %5 to i16
13778 define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13779 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13780 ; VLX: # %bb.0: # %entry
13781 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13782 ; VLX-NEXT: kmovd %k0, %eax
13783 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13784 ; VLX-NEXT: vzeroupper
13787 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13788 ; NoVLX: # %bb.0: # %entry
13789 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13790 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
13791 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13792 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13793 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13794 ; NoVLX-NEXT: kmovw %k0, %eax
13795 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13796 ; NoVLX-NEXT: vzeroupper
13799 %0 = bitcast <4 x i64> %__a to <4 x i64>
13800 %load = load i64, i64* %__b
13801 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13802 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13803 %2 = icmp sge <4 x i64> %0, %1
13804 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13805 %4 = bitcast <16 x i1> %3 to i16
13809 define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13810 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13811 ; VLX: # %bb.0: # %entry
13812 ; VLX-NEXT: kmovd %edi, %k1
13813 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
13814 ; VLX-NEXT: kmovd %k0, %eax
13815 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
13816 ; VLX-NEXT: vzeroupper
13819 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
13820 ; NoVLX: # %bb.0: # %entry
13821 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13822 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
13823 ; NoVLX-NEXT: kmovw %edi, %k1
13824 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13825 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13826 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13827 ; NoVLX-NEXT: kmovw %k0, %eax
13828 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
13829 ; NoVLX-NEXT: vzeroupper
13832 %0 = bitcast <4 x i64> %__a to <4 x i64>
13833 %load = load i64, i64* %__b
13834 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13835 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13836 %2 = icmp sge <4 x i64> %0, %1
13837 %3 = bitcast i8 %__u to <8 x i1>
13838 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13839 %4 = and <4 x i1> %extract.i, %2
13840 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13841 %6 = bitcast <16 x i1> %5 to i16
13846 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13847 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13848 ; VLX: # %bb.0: # %entry
13849 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
13850 ; VLX-NEXT: kmovd %k0, %eax
13851 ; VLX-NEXT: vzeroupper
13854 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
13855 ; NoVLX: # %bb.0: # %entry
13856 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13857 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13858 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13859 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13860 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13861 ; NoVLX-NEXT: kmovw %k0, %eax
13862 ; NoVLX-NEXT: vzeroupper
13865 %0 = bitcast <4 x i64> %__a to <4 x i64>
13866 %1 = bitcast <4 x i64> %__b to <4 x i64>
13867 %2 = icmp sge <4 x i64> %0, %1
13868 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13869 %4 = bitcast <32 x i1> %3 to i32
13873 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13874 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13875 ; VLX: # %bb.0: # %entry
13876 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
13877 ; VLX-NEXT: kmovd %k0, %eax
13878 ; VLX-NEXT: vzeroupper
13881 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
13882 ; NoVLX: # %bb.0: # %entry
13883 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13884 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
13885 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13886 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13887 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13888 ; NoVLX-NEXT: kmovw %k0, %eax
13889 ; NoVLX-NEXT: vzeroupper
13892 %0 = bitcast <4 x i64> %__a to <4 x i64>
13893 %load = load <4 x i64>, <4 x i64>* %__b
13894 %1 = bitcast <4 x i64> %load to <4 x i64>
13895 %2 = icmp sge <4 x i64> %0, %1
13896 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13897 %4 = bitcast <32 x i1> %3 to i32
13901 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
13902 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13903 ; VLX: # %bb.0: # %entry
13904 ; VLX-NEXT: kmovd %edi, %k1
13905 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
13906 ; VLX-NEXT: kmovd %k0, %eax
13907 ; VLX-NEXT: vzeroupper
13910 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
13911 ; NoVLX: # %bb.0: # %entry
13912 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
13913 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13914 ; NoVLX-NEXT: kmovw %edi, %k1
13915 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13916 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13917 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13918 ; NoVLX-NEXT: kmovw %k0, %eax
13919 ; NoVLX-NEXT: vzeroupper
13922 %0 = bitcast <4 x i64> %__a to <4 x i64>
13923 %1 = bitcast <4 x i64> %__b to <4 x i64>
13924 %2 = icmp sge <4 x i64> %0, %1
13925 %3 = bitcast i8 %__u to <8 x i1>
13926 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13927 %4 = and <4 x i1> %2, %extract.i
13928 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13929 %6 = bitcast <32 x i1> %5 to i32
13933 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
13934 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13935 ; VLX: # %bb.0: # %entry
13936 ; VLX-NEXT: kmovd %edi, %k1
13937 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
13938 ; VLX-NEXT: kmovd %k0, %eax
13939 ; VLX-NEXT: vzeroupper
13942 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
13943 ; NoVLX: # %bb.0: # %entry
13944 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13945 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
13946 ; NoVLX-NEXT: kmovw %edi, %k1
13947 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
13948 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13949 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13950 ; NoVLX-NEXT: kmovw %k0, %eax
13951 ; NoVLX-NEXT: vzeroupper
13954 %0 = bitcast <4 x i64> %__a to <4 x i64>
13955 %load = load <4 x i64>, <4 x i64>* %__b
13956 %1 = bitcast <4 x i64> %load to <4 x i64>
13957 %2 = icmp sge <4 x i64> %0, %1
13958 %3 = bitcast i8 %__u to <8 x i1>
13959 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13960 %4 = and <4 x i1> %2, %extract.i
13961 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13962 %6 = bitcast <32 x i1> %5 to i32
13967 define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
13968 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13969 ; VLX: # %bb.0: # %entry
13970 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
13971 ; VLX-NEXT: kmovd %k0, %eax
13972 ; VLX-NEXT: vzeroupper
13975 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13976 ; NoVLX: # %bb.0: # %entry
13977 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
13978 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
13979 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
13980 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
13981 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
13982 ; NoVLX-NEXT: kmovw %k0, %eax
13983 ; NoVLX-NEXT: vzeroupper
13986 %0 = bitcast <4 x i64> %__a to <4 x i64>
13987 %load = load i64, i64* %__b
13988 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
13989 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13990 %2 = icmp sge <4 x i64> %0, %1
13991 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13992 %4 = bitcast <32 x i1> %3 to i32
13996 define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
13997 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
13998 ; VLX: # %bb.0: # %entry
13999 ; VLX-NEXT: kmovd %edi, %k1
14000 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
14001 ; VLX-NEXT: kmovd %k0, %eax
14002 ; VLX-NEXT: vzeroupper
14005 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
14006 ; NoVLX: # %bb.0: # %entry
14007 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
14008 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
14009 ; NoVLX-NEXT: kmovw %edi, %k1
14010 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14011 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
14012 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
14013 ; NoVLX-NEXT: kmovw %k0, %eax
14014 ; NoVLX-NEXT: vzeroupper
14017 %0 = bitcast <4 x i64> %__a to <4 x i64>
14018 %load = load i64, i64* %__b
14019 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
14020 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
14021 %2 = icmp sge <4 x i64> %0, %1
14022 %3 = bitcast i8 %__u to <8 x i1>
14023 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14024 %4 = and <4 x i1> %extract.i, %2
14025 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14026 %6 = bitcast <32 x i1> %5 to i32
14031 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14032 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
14033 ; VLX: # %bb.0: # %entry
14034 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0
14035 ; VLX-NEXT: kmovq %k0, %rax
14036 ; VLX-NEXT: vzeroupper
14039 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
14040 ; NoVLX: # %bb.0: # %entry
14041 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
14042 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
14043 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14044 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
14045 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
14046 ; NoVLX-NEXT: kmovw %k0, %eax
14047 ; NoVLX-NEXT: movzwl %ax, %eax
14048 ; NoVLX-NEXT: vzeroupper
14051 %0 = bitcast <4 x i64> %__a to <4 x i64>
14052 %1 = bitcast <4 x i64> %__b to <4 x i64>
14053 %2 = icmp sge <4 x i64> %0, %1
14054 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14055 %4 = bitcast <64 x i1> %3 to i64
14059 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
14060 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
14061 ; VLX: # %bb.0: # %entry
14062 ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
14063 ; VLX-NEXT: kmovq %k0, %rax
14064 ; VLX-NEXT: vzeroupper
14067 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
14068 ; NoVLX: # %bb.0: # %entry
14069 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
14070 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
14071 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14072 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
14073 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
14074 ; NoVLX-NEXT: kmovw %k0, %eax
14075 ; NoVLX-NEXT: movzwl %ax, %eax
14076 ; NoVLX-NEXT: vzeroupper
14079 %0 = bitcast <4 x i64> %__a to <4 x i64>
14080 %load = load <4 x i64>, <4 x i64>* %__b
14081 %1 = bitcast <4 x i64> %load to <4 x i64>
14082 %2 = icmp sge <4 x i64> %0, %1
14083 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14084 %4 = bitcast <64 x i1> %3 to i64
14088 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14089 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
14090 ; VLX: # %bb.0: # %entry
14091 ; VLX-NEXT: kmovd %edi, %k1
14092 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1}
14093 ; VLX-NEXT: kmovq %k0, %rax
14094 ; VLX-NEXT: vzeroupper
14097 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
14098 ; NoVLX: # %bb.0: # %entry
14099 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
14100 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
14101 ; NoVLX-NEXT: kmovw %edi, %k1
14102 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14103 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
14104 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
14105 ; NoVLX-NEXT: kmovw %k0, %eax
14106 ; NoVLX-NEXT: movzwl %ax, %eax
14107 ; NoVLX-NEXT: vzeroupper
14110 %0 = bitcast <4 x i64> %__a to <4 x i64>
14111 %1 = bitcast <4 x i64> %__b to <4 x i64>
14112 %2 = icmp sge <4 x i64> %0, %1
14113 %3 = bitcast i8 %__u to <8 x i1>
14114 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14115 %4 = and <4 x i1> %2, %extract.i
14116 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14117 %6 = bitcast <64 x i1> %5 to i64
14121 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
14122 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
14123 ; VLX: # %bb.0: # %entry
14124 ; VLX-NEXT: kmovd %edi, %k1
14125 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
14126 ; VLX-NEXT: kmovq %k0, %rax
14127 ; VLX-NEXT: vzeroupper
14130 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
14131 ; NoVLX: # %bb.0: # %entry
14132 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
14133 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
14134 ; NoVLX-NEXT: kmovw %edi, %k1
14135 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14136 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
14137 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
14138 ; NoVLX-NEXT: kmovw %k0, %eax
14139 ; NoVLX-NEXT: movzwl %ax, %eax
14140 ; NoVLX-NEXT: vzeroupper
14143 %0 = bitcast <4 x i64> %__a to <4 x i64>
14144 %load = load <4 x i64>, <4 x i64>* %__b
14145 %1 = bitcast <4 x i64> %load to <4 x i64>
14146 %2 = icmp sge <4 x i64> %0, %1
14147 %3 = bitcast i8 %__u to <8 x i1>
14148 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14149 %4 = and <4 x i1> %2, %extract.i
14150 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14151 %6 = bitcast <64 x i1> %5 to i64
14156 define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
14157 ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14158 ; VLX: # %bb.0: # %entry
14159 ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0
14160 ; VLX-NEXT: kmovq %k0, %rax
14161 ; VLX-NEXT: vzeroupper
14164 ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14165 ; NoVLX: # %bb.0: # %entry
14166 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
14167 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
14168 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14169 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
14170 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
14171 ; NoVLX-NEXT: kmovw %k0, %eax
14172 ; NoVLX-NEXT: movzwl %ax, %eax
14173 ; NoVLX-NEXT: vzeroupper
14176 %0 = bitcast <4 x i64> %__a to <4 x i64>
14177 %load = load i64, i64* %__b
14178 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
14179 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
14180 %2 = icmp sge <4 x i64> %0, %1
14181 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14182 %4 = bitcast <64 x i1> %3 to i64
14186 define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
14187 ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14188 ; VLX: # %bb.0: # %entry
14189 ; VLX-NEXT: kmovd %edi, %k1
14190 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1}
14191 ; VLX-NEXT: kmovq %k0, %rax
14192 ; VLX-NEXT: vzeroupper
14195 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
14196 ; NoVLX: # %bb.0: # %entry
14197 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
14198 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
14199 ; NoVLX-NEXT: kmovw %edi, %k1
14200 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14201 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
14202 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
14203 ; NoVLX-NEXT: kmovw %k0, %eax
14204 ; NoVLX-NEXT: movzwl %ax, %eax
14205 ; NoVLX-NEXT: vzeroupper
14208 %0 = bitcast <4 x i64> %__a to <4 x i64>
14209 %load = load i64, i64* %__b
14210 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
14211 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
14212 %2 = icmp sge <4 x i64> %0, %1
14213 %3 = bitcast i8 %__u to <8 x i1>
14214 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14215 %4 = and <4 x i1> %extract.i, %2
14216 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
14217 %6 = bitcast <64 x i1> %5 to i64
14222 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14223 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14224 ; VLX: # %bb.0: # %entry
14225 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14226 ; VLX-NEXT: kmovd %k0, %eax
14227 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14228 ; VLX-NEXT: vzeroupper
14231 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
14232 ; NoVLX: # %bb.0: # %entry
14233 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14234 ; NoVLX-NEXT: kmovw %k0, %eax
14235 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14236 ; NoVLX-NEXT: vzeroupper
14239 %0 = bitcast <8 x i64> %__a to <8 x i64>
14240 %1 = bitcast <8 x i64> %__b to <8 x i64>
14241 %2 = icmp sge <8 x i64> %0, %1
14242 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14243 %4 = bitcast <16 x i1> %3 to i16
14247 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14248 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14249 ; VLX: # %bb.0: # %entry
14250 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14251 ; VLX-NEXT: kmovd %k0, %eax
14252 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14253 ; VLX-NEXT: vzeroupper
14256 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
14257 ; NoVLX: # %bb.0: # %entry
14258 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14259 ; NoVLX-NEXT: kmovw %k0, %eax
14260 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14261 ; NoVLX-NEXT: vzeroupper
14264 %0 = bitcast <8 x i64> %__a to <8 x i64>
14265 %load = load <8 x i64>, <8 x i64>* %__b
14266 %1 = bitcast <8 x i64> %load to <8 x i64>
14267 %2 = icmp sge <8 x i64> %0, %1
14268 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14269 %4 = bitcast <16 x i1> %3 to i16
14273 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14274 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14275 ; VLX: # %bb.0: # %entry
14276 ; VLX-NEXT: kmovd %edi, %k1
14277 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14278 ; VLX-NEXT: kmovd %k0, %eax
14279 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14280 ; VLX-NEXT: vzeroupper
14283 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
14284 ; NoVLX: # %bb.0: # %entry
14285 ; NoVLX-NEXT: kmovw %edi, %k1
14286 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14287 ; NoVLX-NEXT: kmovw %k0, %eax
14288 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14289 ; NoVLX-NEXT: vzeroupper
14292 %0 = bitcast <8 x i64> %__a to <8 x i64>
14293 %1 = bitcast <8 x i64> %__b to <8 x i64>
14294 %2 = icmp sge <8 x i64> %0, %1
14295 %3 = bitcast i8 %__u to <8 x i1>
14296 %4 = and <8 x i1> %2, %3
14297 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14298 %6 = bitcast <16 x i1> %5 to i16
14302 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14303 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14304 ; VLX: # %bb.0: # %entry
14305 ; VLX-NEXT: kmovd %edi, %k1
14306 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14307 ; VLX-NEXT: kmovd %k0, %eax
14308 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14309 ; VLX-NEXT: vzeroupper
14312 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
14313 ; NoVLX: # %bb.0: # %entry
14314 ; NoVLX-NEXT: kmovw %edi, %k1
14315 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14316 ; NoVLX-NEXT: kmovw %k0, %eax
14317 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14318 ; NoVLX-NEXT: vzeroupper
14321 %0 = bitcast <8 x i64> %__a to <8 x i64>
14322 %load = load <8 x i64>, <8 x i64>* %__b
14323 %1 = bitcast <8 x i64> %load to <8 x i64>
14324 %2 = icmp sge <8 x i64> %0, %1
14325 %3 = bitcast i8 %__u to <8 x i1>
14326 %4 = and <8 x i1> %2, %3
14327 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14328 %6 = bitcast <16 x i1> %5 to i16
14333 define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14334 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14335 ; VLX: # %bb.0: # %entry
14336 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14337 ; VLX-NEXT: kmovd %k0, %eax
14338 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14339 ; VLX-NEXT: vzeroupper
14342 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14343 ; NoVLX: # %bb.0: # %entry
14344 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14345 ; NoVLX-NEXT: kmovw %k0, %eax
14346 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14347 ; NoVLX-NEXT: vzeroupper
14350 %0 = bitcast <8 x i64> %__a to <8 x i64>
14351 %load = load i64, i64* %__b
14352 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14353 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14354 %2 = icmp sge <8 x i64> %0, %1
14355 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14356 %4 = bitcast <16 x i1> %3 to i16
14360 define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14361 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14362 ; VLX: # %bb.0: # %entry
14363 ; VLX-NEXT: kmovd %edi, %k1
14364 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14365 ; VLX-NEXT: kmovd %k0, %eax
14366 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
14367 ; VLX-NEXT: vzeroupper
14370 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
14371 ; NoVLX: # %bb.0: # %entry
14372 ; NoVLX-NEXT: kmovw %edi, %k1
14373 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14374 ; NoVLX-NEXT: kmovw %k0, %eax
14375 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
14376 ; NoVLX-NEXT: vzeroupper
14379 %0 = bitcast <8 x i64> %__a to <8 x i64>
14380 %load = load i64, i64* %__b
14381 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14382 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14383 %2 = icmp sge <8 x i64> %0, %1
14384 %3 = bitcast i8 %__u to <8 x i1>
14385 %4 = and <8 x i1> %3, %2
14386 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14387 %6 = bitcast <16 x i1> %5 to i16
14392 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14393 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14394 ; VLX: # %bb.0: # %entry
14395 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14396 ; VLX-NEXT: kmovd %k0, %eax
14397 ; VLX-NEXT: vzeroupper
14400 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
14401 ; NoVLX: # %bb.0: # %entry
14402 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14403 ; NoVLX-NEXT: kmovw %k0, %eax
14404 ; NoVLX-NEXT: vzeroupper
14407 %0 = bitcast <8 x i64> %__a to <8 x i64>
14408 %1 = bitcast <8 x i64> %__b to <8 x i64>
14409 %2 = icmp sge <8 x i64> %0, %1
14410 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14411 %4 = bitcast <32 x i1> %3 to i32
14415 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14416 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14417 ; VLX: # %bb.0: # %entry
14418 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14419 ; VLX-NEXT: kmovd %k0, %eax
14420 ; VLX-NEXT: vzeroupper
14423 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
14424 ; NoVLX: # %bb.0: # %entry
14425 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14426 ; NoVLX-NEXT: kmovw %k0, %eax
14427 ; NoVLX-NEXT: vzeroupper
14430 %0 = bitcast <8 x i64> %__a to <8 x i64>
14431 %load = load <8 x i64>, <8 x i64>* %__b
14432 %1 = bitcast <8 x i64> %load to <8 x i64>
14433 %2 = icmp sge <8 x i64> %0, %1
14434 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14435 %4 = bitcast <32 x i1> %3 to i32
14439 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14440 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14441 ; VLX: # %bb.0: # %entry
14442 ; VLX-NEXT: kmovd %edi, %k1
14443 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14444 ; VLX-NEXT: kmovd %k0, %eax
14445 ; VLX-NEXT: vzeroupper
14448 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
14449 ; NoVLX: # %bb.0: # %entry
14450 ; NoVLX-NEXT: kmovw %edi, %k1
14451 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14452 ; NoVLX-NEXT: kmovw %k0, %eax
14453 ; NoVLX-NEXT: vzeroupper
14456 %0 = bitcast <8 x i64> %__a to <8 x i64>
14457 %1 = bitcast <8 x i64> %__b to <8 x i64>
14458 %2 = icmp sge <8 x i64> %0, %1
14459 %3 = bitcast i8 %__u to <8 x i1>
14460 %4 = and <8 x i1> %2, %3
14461 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14462 %6 = bitcast <32 x i1> %5 to i32
14466 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14467 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14468 ; VLX: # %bb.0: # %entry
14469 ; VLX-NEXT: kmovd %edi, %k1
14470 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14471 ; VLX-NEXT: kmovd %k0, %eax
14472 ; VLX-NEXT: vzeroupper
14475 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
14476 ; NoVLX: # %bb.0: # %entry
14477 ; NoVLX-NEXT: kmovw %edi, %k1
14478 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14479 ; NoVLX-NEXT: kmovw %k0, %eax
14480 ; NoVLX-NEXT: vzeroupper
14483 %0 = bitcast <8 x i64> %__a to <8 x i64>
14484 %load = load <8 x i64>, <8 x i64>* %__b
14485 %1 = bitcast <8 x i64> %load to <8 x i64>
14486 %2 = icmp sge <8 x i64> %0, %1
14487 %3 = bitcast i8 %__u to <8 x i1>
14488 %4 = and <8 x i1> %2, %3
14489 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14490 %6 = bitcast <32 x i1> %5 to i32
14495 define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14496 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14497 ; VLX: # %bb.0: # %entry
14498 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14499 ; VLX-NEXT: kmovd %k0, %eax
14500 ; VLX-NEXT: vzeroupper
14503 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14504 ; NoVLX: # %bb.0: # %entry
14505 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14506 ; NoVLX-NEXT: kmovw %k0, %eax
14507 ; NoVLX-NEXT: vzeroupper
14510 %0 = bitcast <8 x i64> %__a to <8 x i64>
14511 %load = load i64, i64* %__b
14512 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14513 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14514 %2 = icmp sge <8 x i64> %0, %1
14515 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14516 %4 = bitcast <32 x i1> %3 to i32
14520 define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14521 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14522 ; VLX: # %bb.0: # %entry
14523 ; VLX-NEXT: kmovd %edi, %k1
14524 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14525 ; VLX-NEXT: kmovd %k0, %eax
14526 ; VLX-NEXT: vzeroupper
14529 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
14530 ; NoVLX: # %bb.0: # %entry
14531 ; NoVLX-NEXT: kmovw %edi, %k1
14532 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14533 ; NoVLX-NEXT: kmovw %k0, %eax
14534 ; NoVLX-NEXT: vzeroupper
14537 %0 = bitcast <8 x i64> %__a to <8 x i64>
14538 %load = load i64, i64* %__b
14539 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14540 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14541 %2 = icmp sge <8 x i64> %0, %1
14542 %3 = bitcast i8 %__u to <8 x i1>
14543 %4 = and <8 x i1> %3, %2
14544 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14545 %6 = bitcast <32 x i1> %5 to i32
14550 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14551 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14552 ; VLX: # %bb.0: # %entry
14553 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14554 ; VLX-NEXT: kmovq %k0, %rax
14555 ; VLX-NEXT: vzeroupper
14558 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
14559 ; NoVLX: # %bb.0: # %entry
14560 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
14561 ; NoVLX-NEXT: kmovw %k0, %eax
14562 ; NoVLX-NEXT: movzwl %ax, %eax
14563 ; NoVLX-NEXT: vzeroupper
14566 %0 = bitcast <8 x i64> %__a to <8 x i64>
14567 %1 = bitcast <8 x i64> %__b to <8 x i64>
14568 %2 = icmp sge <8 x i64> %0, %1
14569 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14570 %4 = bitcast <64 x i1> %3 to i64
14574 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14575 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14576 ; VLX: # %bb.0: # %entry
14577 ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14578 ; VLX-NEXT: kmovq %k0, %rax
14579 ; VLX-NEXT: vzeroupper
14582 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
14583 ; NoVLX: # %bb.0: # %entry
14584 ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
14585 ; NoVLX-NEXT: kmovw %k0, %eax
14586 ; NoVLX-NEXT: movzwl %ax, %eax
14587 ; NoVLX-NEXT: vzeroupper
14590 %0 = bitcast <8 x i64> %__a to <8 x i64>
14591 %load = load <8 x i64>, <8 x i64>* %__b
14592 %1 = bitcast <8 x i64> %load to <8 x i64>
14593 %2 = icmp sge <8 x i64> %0, %1
14594 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14595 %4 = bitcast <64 x i1> %3 to i64
14599 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
14600 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14601 ; VLX: # %bb.0: # %entry
14602 ; VLX-NEXT: kmovd %edi, %k1
14603 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14604 ; VLX-NEXT: kmovq %k0, %rax
14605 ; VLX-NEXT: vzeroupper
14608 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
14609 ; NoVLX: # %bb.0: # %entry
14610 ; NoVLX-NEXT: kmovw %edi, %k1
14611 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
14612 ; NoVLX-NEXT: kmovw %k0, %eax
14613 ; NoVLX-NEXT: movzwl %ax, %eax
14614 ; NoVLX-NEXT: vzeroupper
14617 %0 = bitcast <8 x i64> %__a to <8 x i64>
14618 %1 = bitcast <8 x i64> %__b to <8 x i64>
14619 %2 = icmp sge <8 x i64> %0, %1
14620 %3 = bitcast i8 %__u to <8 x i1>
14621 %4 = and <8 x i1> %2, %3
14622 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14623 %6 = bitcast <64 x i1> %5 to i64
14627 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
14628 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14629 ; VLX: # %bb.0: # %entry
14630 ; VLX-NEXT: kmovd %edi, %k1
14631 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14632 ; VLX-NEXT: kmovq %k0, %rax
14633 ; VLX-NEXT: vzeroupper
14636 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
14637 ; NoVLX: # %bb.0: # %entry
14638 ; NoVLX-NEXT: kmovw %edi, %k1
14639 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
14640 ; NoVLX-NEXT: kmovw %k0, %eax
14641 ; NoVLX-NEXT: movzwl %ax, %eax
14642 ; NoVLX-NEXT: vzeroupper
14645 %0 = bitcast <8 x i64> %__a to <8 x i64>
14646 %load = load <8 x i64>, <8 x i64>* %__b
14647 %1 = bitcast <8 x i64> %load to <8 x i64>
14648 %2 = icmp sge <8 x i64> %0, %1
14649 %3 = bitcast i8 %__u to <8 x i1>
14650 %4 = and <8 x i1> %2, %3
14651 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14652 %6 = bitcast <64 x i1> %5 to i64
14657 define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
14658 ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14659 ; VLX: # %bb.0: # %entry
14660 ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14661 ; VLX-NEXT: kmovq %k0, %rax
14662 ; VLX-NEXT: vzeroupper
14665 ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14666 ; NoVLX: # %bb.0: # %entry
14667 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
14668 ; NoVLX-NEXT: kmovw %k0, %eax
14669 ; NoVLX-NEXT: movzwl %ax, %eax
14670 ; NoVLX-NEXT: vzeroupper
14673 %0 = bitcast <8 x i64> %__a to <8 x i64>
14674 %load = load i64, i64* %__b
14675 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14676 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14677 %2 = icmp sge <8 x i64> %0, %1
14678 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14679 %4 = bitcast <64 x i1> %3 to i64
14683 define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
14684 ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14685 ; VLX: # %bb.0: # %entry
14686 ; VLX-NEXT: kmovd %edi, %k1
14687 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14688 ; VLX-NEXT: kmovq %k0, %rax
14689 ; VLX-NEXT: vzeroupper
14692 ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
14693 ; NoVLX: # %bb.0: # %entry
14694 ; NoVLX-NEXT: kmovw %edi, %k1
14695 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
14696 ; NoVLX-NEXT: kmovw %k0, %eax
14697 ; NoVLX-NEXT: movzwl %ax, %eax
14698 ; NoVLX-NEXT: vzeroupper
14701 %0 = bitcast <8 x i64> %__a to <8 x i64>
14702 %load = load i64, i64* %__b
14703 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
14704 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
14705 %2 = icmp sge <8 x i64> %0, %1
14706 %3 = bitcast i8 %__u to <8 x i1>
14707 %4 = and <8 x i1> %3, %2
14708 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
14709 %6 = bitcast <64 x i1> %5 to i64
14714 define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14715 ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14716 ; VLX: # %bb.0: # %entry
14717 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
14718 ; VLX-NEXT: kmovd %k0, %eax
14721 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
14722 ; NoVLX: # %bb.0: # %entry
14723 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14724 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14725 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14726 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14727 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14728 ; NoVLX-NEXT: kmovw %k0, %eax
14729 ; NoVLX-NEXT: vzeroupper
14732 %0 = bitcast <2 x i64> %__a to <16 x i8>
14733 %1 = bitcast <2 x i64> %__b to <16 x i8>
14734 %2 = icmp ult <16 x i8> %0, %1
14735 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14736 %4 = bitcast <32 x i1> %3 to i32
14740 define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14741 ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14742 ; VLX: # %bb.0: # %entry
14743 ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
14744 ; VLX-NEXT: kmovd %k0, %eax
14747 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
14748 ; NoVLX: # %bb.0: # %entry
14749 ; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1
14750 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14751 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14752 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14753 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14754 ; NoVLX-NEXT: kmovw %k0, %eax
14755 ; NoVLX-NEXT: vzeroupper
14758 %0 = bitcast <2 x i64> %__a to <16 x i8>
14759 %load = load <2 x i64>, <2 x i64>* %__b
14760 %1 = bitcast <2 x i64> %load to <16 x i8>
14761 %2 = icmp ult <16 x i8> %0, %1
14762 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14763 %4 = bitcast <32 x i1> %3 to i32
14767 define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14768 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14769 ; VLX: # %bb.0: # %entry
14770 ; VLX-NEXT: kmovd %edi, %k1
14771 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14772 ; VLX-NEXT: kmovd %k0, %eax
14775 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
14776 ; NoVLX: # %bb.0: # %entry
14777 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14778 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14779 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14780 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14781 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14782 ; NoVLX-NEXT: kmovw %k0, %eax
14783 ; NoVLX-NEXT: andl %edi, %eax
14784 ; NoVLX-NEXT: vzeroupper
14787 %0 = bitcast <2 x i64> %__a to <16 x i8>
14788 %1 = bitcast <2 x i64> %__b to <16 x i8>
14789 %2 = icmp ult <16 x i8> %0, %1
14790 %3 = bitcast i16 %__u to <16 x i1>
14791 %4 = and <16 x i1> %2, %3
14792 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14793 %6 = bitcast <32 x i1> %5 to i32
14797 define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14798 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14799 ; VLX: # %bb.0: # %entry
14800 ; VLX-NEXT: kmovd %edi, %k1
14801 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14802 ; VLX-NEXT: kmovd %k0, %eax
14805 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
14806 ; NoVLX: # %bb.0: # %entry
14807 ; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1
14808 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14809 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14810 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14811 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14812 ; NoVLX-NEXT: kmovw %k0, %eax
14813 ; NoVLX-NEXT: andl %edi, %eax
14814 ; NoVLX-NEXT: vzeroupper
14817 %0 = bitcast <2 x i64> %__a to <16 x i8>
14818 %load = load <2 x i64>, <2 x i64>* %__b
14819 %1 = bitcast <2 x i64> %load to <16 x i8>
14820 %2 = icmp ult <16 x i8> %0, %1
14821 %3 = bitcast i16 %__u to <16 x i1>
14822 %4 = and <16 x i1> %2, %3
14823 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14824 %6 = bitcast <32 x i1> %5 to i32
14829 define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14830 ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14831 ; VLX: # %bb.0: # %entry
14832 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
14833 ; VLX-NEXT: kmovq %k0, %rax
14836 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
14837 ; NoVLX: # %bb.0: # %entry
14838 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14839 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14840 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14841 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14842 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14843 ; NoVLX-NEXT: kmovw %k0, %eax
14844 ; NoVLX-NEXT: movzwl %ax, %eax
14845 ; NoVLX-NEXT: vzeroupper
14848 %0 = bitcast <2 x i64> %__a to <16 x i8>
14849 %1 = bitcast <2 x i64> %__b to <16 x i8>
14850 %2 = icmp ult <16 x i8> %0, %1
14851 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14852 %4 = bitcast <64 x i1> %3 to i64
14856 define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14857 ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14858 ; VLX: # %bb.0: # %entry
14859 ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
14860 ; VLX-NEXT: kmovq %k0, %rax
14863 ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
14864 ; NoVLX: # %bb.0: # %entry
14865 ; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1
14866 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14867 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14868 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14869 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14870 ; NoVLX-NEXT: kmovw %k0, %eax
14871 ; NoVLX-NEXT: movzwl %ax, %eax
14872 ; NoVLX-NEXT: vzeroupper
14875 %0 = bitcast <2 x i64> %__a to <16 x i8>
14876 %load = load <2 x i64>, <2 x i64>* %__b
14877 %1 = bitcast <2 x i64> %load to <16 x i8>
14878 %2 = icmp ult <16 x i8> %0, %1
14879 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14880 %4 = bitcast <64 x i1> %3 to i64
14884 define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
14885 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14886 ; VLX: # %bb.0: # %entry
14887 ; VLX-NEXT: kmovd %edi, %k1
14888 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
14889 ; VLX-NEXT: kmovq %k0, %rax
14892 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
14893 ; NoVLX: # %bb.0: # %entry
14894 ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
14895 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14896 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14897 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14898 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14899 ; NoVLX-NEXT: kmovw %k0, %eax
14900 ; NoVLX-NEXT: andl %edi, %eax
14901 ; NoVLX-NEXT: vzeroupper
14904 %0 = bitcast <2 x i64> %__a to <16 x i8>
14905 %1 = bitcast <2 x i64> %__b to <16 x i8>
14906 %2 = icmp ult <16 x i8> %0, %1
14907 %3 = bitcast i16 %__u to <16 x i1>
14908 %4 = and <16 x i1> %2, %3
14909 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14910 %6 = bitcast <64 x i1> %5 to i64
14914 define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
14915 ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14916 ; VLX: # %bb.0: # %entry
14917 ; VLX-NEXT: kmovd %edi, %k1
14918 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
14919 ; VLX-NEXT: kmovq %k0, %rax
14922 ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
14923 ; NoVLX: # %bb.0: # %entry
14924 ; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1
14925 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
14926 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14927 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14928 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14929 ; NoVLX-NEXT: kmovw %k0, %eax
14930 ; NoVLX-NEXT: andl %edi, %eax
14931 ; NoVLX-NEXT: vzeroupper
14934 %0 = bitcast <2 x i64> %__a to <16 x i8>
14935 %load = load <2 x i64>, <2 x i64>* %__b
14936 %1 = bitcast <2 x i64> %load to <16 x i8>
14937 %2 = icmp ult <16 x i8> %0, %1
14938 %3 = bitcast i16 %__u to <16 x i1>
14939 %4 = and <16 x i1> %2, %3
14940 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14941 %6 = bitcast <64 x i1> %5 to i64
14946 define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
14947 ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
14948 ; VLX: # %bb.0: # %entry
14949 ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0
14950 ; VLX-NEXT: kmovq %k0, %rax
14951 ; VLX-NEXT: vzeroupper
14954 ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
14955 ; NoVLX: # %bb.0: # %entry
14956 ; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1
14957 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14958 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14959 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14960 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14961 ; NoVLX-NEXT: kmovw %k0, %ecx
14962 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14963 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14964 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14965 ; NoVLX-NEXT: kmovw %k0, %eax
14966 ; NoVLX-NEXT: shll $16, %eax
14967 ; NoVLX-NEXT: orl %ecx, %eax
14968 ; NoVLX-NEXT: vzeroupper
14971 %0 = bitcast <4 x i64> %__a to <32 x i8>
14972 %1 = bitcast <4 x i64> %__b to <32 x i8>
14973 %2 = icmp ult <32 x i8> %0, %1
14974 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
14975 %4 = bitcast <64 x i1> %3 to i64
14979 define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
14980 ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
14981 ; VLX: # %bb.0: # %entry
14982 ; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0
14983 ; VLX-NEXT: kmovq %k0, %rax
14984 ; VLX-NEXT: vzeroupper
14987 ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
14988 ; NoVLX: # %bb.0: # %entry
14989 ; NoVLX-NEXT: vpmaxub (%rdi), %ymm0, %ymm1
14990 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
14991 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
14992 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
14993 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
14994 ; NoVLX-NEXT: kmovw %k0, %ecx
14995 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
14996 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
14997 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
14998 ; NoVLX-NEXT: kmovw %k0, %eax
14999 ; NoVLX-NEXT: shll $16, %eax
15000 ; NoVLX-NEXT: orl %ecx, %eax
15001 ; NoVLX-NEXT: vzeroupper
15004 %0 = bitcast <4 x i64> %__a to <32 x i8>
15005 %load = load <4 x i64>, <4 x i64>* %__b
15006 %1 = bitcast <4 x i64> %load to <32 x i8>
15007 %2 = icmp ult <32 x i8> %0, %1
15008 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15009 %4 = bitcast <64 x i1> %3 to i64
15013 define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15014 ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
15015 ; VLX: # %bb.0: # %entry
15016 ; VLX-NEXT: kmovd %edi, %k1
15017 ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1}
15018 ; VLX-NEXT: kmovq %k0, %rax
15019 ; VLX-NEXT: vzeroupper
15022 ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
15023 ; NoVLX: # %bb.0: # %entry
15024 ; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1
15025 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
15026 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15027 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
15028 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
15029 ; NoVLX-NEXT: kmovw %k0, %eax
15030 ; NoVLX-NEXT: andl %edi, %eax
15031 ; NoVLX-NEXT: shrl $16, %edi
15032 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
15033 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
15034 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15035 ; NoVLX-NEXT: kmovw %k0, %ecx
15036 ; NoVLX-NEXT: andl %edi, %ecx
15037 ; NoVLX-NEXT: shll $16, %ecx
15038 ; NoVLX-NEXT: movzwl %ax, %eax
15039 ; NoVLX-NEXT: orl %ecx, %eax
15040 ; NoVLX-NEXT: vzeroupper
15043 %0 = bitcast <4 x i64> %__a to <32 x i8>
15044 %1 = bitcast <4 x i64> %__b to <32 x i8>
15045 %2 = icmp ult <32 x i8> %0, %1
15046 %3 = bitcast i32 %__u to <32 x i1>
15047 %4 = and <32 x i1> %2, %3
15048 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15049 %6 = bitcast <64 x i1> %5 to i64
15053 define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15054 ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
15055 ; VLX: # %bb.0: # %entry
15056 ; VLX-NEXT: kmovd %edi, %k1
15057 ; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1}
15058 ; VLX-NEXT: kmovq %k0, %rax
15059 ; VLX-NEXT: vzeroupper
15062 ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
15063 ; NoVLX: # %bb.0: # %entry
15064 ; NoVLX-NEXT: vpmaxub (%rsi), %ymm0, %ymm1
15065 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
15066 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15067 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
15068 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
15069 ; NoVLX-NEXT: kmovw %k0, %eax
15070 ; NoVLX-NEXT: andl %edi, %eax
15071 ; NoVLX-NEXT: shrl $16, %edi
15072 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
15073 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
15074 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15075 ; NoVLX-NEXT: kmovw %k0, %ecx
15076 ; NoVLX-NEXT: andl %edi, %ecx
15077 ; NoVLX-NEXT: shll $16, %ecx
15078 ; NoVLX-NEXT: movzwl %ax, %eax
15079 ; NoVLX-NEXT: orl %ecx, %eax
15080 ; NoVLX-NEXT: vzeroupper
15083 %0 = bitcast <4 x i64> %__a to <32 x i8>
15084 %load = load <4 x i64>, <4 x i64>* %__b
15085 %1 = bitcast <4 x i64> %load to <32 x i8>
15086 %2 = icmp ult <32 x i8> %0, %1
15087 %3 = bitcast i32 %__u to <32 x i1>
15088 %4 = and <32 x i1> %2, %3
15089 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15090 %6 = bitcast <64 x i1> %5 to i64
15095 define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15096 ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
15097 ; VLX: # %bb.0: # %entry
15098 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
15099 ; VLX-NEXT: kmovd %k0, %eax
15100 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15103 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
15104 ; NoVLX: # %bb.0: # %entry
15105 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15106 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15107 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15108 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15109 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15110 ; NoVLX-NEXT: kmovw %k0, %eax
15111 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15112 ; NoVLX-NEXT: vzeroupper
15115 %0 = bitcast <2 x i64> %__a to <8 x i16>
15116 %1 = bitcast <2 x i64> %__b to <8 x i16>
15117 %2 = icmp ult <8 x i16> %0, %1
15118 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15119 %4 = bitcast <16 x i1> %3 to i16
15123 define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15124 ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
15125 ; VLX: # %bb.0: # %entry
15126 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
15127 ; VLX-NEXT: kmovd %k0, %eax
15128 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15131 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
15132 ; NoVLX: # %bb.0: # %entry
15133 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
15134 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15135 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15136 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15137 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15138 ; NoVLX-NEXT: kmovw %k0, %eax
15139 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15140 ; NoVLX-NEXT: vzeroupper
15143 %0 = bitcast <2 x i64> %__a to <8 x i16>
15144 %load = load <2 x i64>, <2 x i64>* %__b
15145 %1 = bitcast <2 x i64> %load to <8 x i16>
15146 %2 = icmp ult <8 x i16> %0, %1
15147 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15148 %4 = bitcast <16 x i1> %3 to i16
15152 define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15153 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
15154 ; VLX: # %bb.0: # %entry
15155 ; VLX-NEXT: kmovd %edi, %k1
15156 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15157 ; VLX-NEXT: kmovd %k0, %eax
15158 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15161 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
15162 ; NoVLX: # %bb.0: # %entry
15163 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15164 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15165 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15166 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15167 ; NoVLX-NEXT: kmovw %edi, %k1
15168 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15169 ; NoVLX-NEXT: kmovw %k0, %eax
15170 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15171 ; NoVLX-NEXT: vzeroupper
15174 %0 = bitcast <2 x i64> %__a to <8 x i16>
15175 %1 = bitcast <2 x i64> %__b to <8 x i16>
15176 %2 = icmp ult <8 x i16> %0, %1
15177 %3 = bitcast i8 %__u to <8 x i1>
15178 %4 = and <8 x i1> %2, %3
15179 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15180 %6 = bitcast <16 x i1> %5 to i16
15184 define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15185 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
15186 ; VLX: # %bb.0: # %entry
15187 ; VLX-NEXT: kmovd %edi, %k1
15188 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15189 ; VLX-NEXT: kmovd %k0, %eax
15190 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
15193 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
15194 ; NoVLX: # %bb.0: # %entry
15195 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
15196 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15197 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15198 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15199 ; NoVLX-NEXT: kmovw %edi, %k1
15200 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15201 ; NoVLX-NEXT: kmovw %k0, %eax
15202 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
15203 ; NoVLX-NEXT: vzeroupper
15206 %0 = bitcast <2 x i64> %__a to <8 x i16>
15207 %load = load <2 x i64>, <2 x i64>* %__b
15208 %1 = bitcast <2 x i64> %load to <8 x i16>
15209 %2 = icmp ult <8 x i16> %0, %1
15210 %3 = bitcast i8 %__u to <8 x i1>
15211 %4 = and <8 x i1> %2, %3
15212 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15213 %6 = bitcast <16 x i1> %5 to i16
15218 define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15219 ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
15220 ; VLX: # %bb.0: # %entry
15221 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
15222 ; VLX-NEXT: kmovd %k0, %eax
15225 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
15226 ; NoVLX: # %bb.0: # %entry
15227 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15228 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15229 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15230 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15231 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15232 ; NoVLX-NEXT: kmovw %k0, %eax
15233 ; NoVLX-NEXT: vzeroupper
15236 %0 = bitcast <2 x i64> %__a to <8 x i16>
15237 %1 = bitcast <2 x i64> %__b to <8 x i16>
15238 %2 = icmp ult <8 x i16> %0, %1
15239 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15240 %4 = bitcast <32 x i1> %3 to i32
15244 define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15245 ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15246 ; VLX: # %bb.0: # %entry
15247 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
15248 ; VLX-NEXT: kmovd %k0, %eax
15251 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
15252 ; NoVLX: # %bb.0: # %entry
15253 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
15254 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15255 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15256 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15257 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15258 ; NoVLX-NEXT: kmovw %k0, %eax
15259 ; NoVLX-NEXT: vzeroupper
15262 %0 = bitcast <2 x i64> %__a to <8 x i16>
15263 %load = load <2 x i64>, <2 x i64>* %__b
15264 %1 = bitcast <2 x i64> %load to <8 x i16>
15265 %2 = icmp ult <8 x i16> %0, %1
15266 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15267 %4 = bitcast <32 x i1> %3 to i32
15271 define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15272 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15273 ; VLX: # %bb.0: # %entry
15274 ; VLX-NEXT: kmovd %edi, %k1
15275 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15276 ; VLX-NEXT: kmovd %k0, %eax
15279 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
15280 ; NoVLX: # %bb.0: # %entry
15281 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15282 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15283 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15284 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15285 ; NoVLX-NEXT: kmovw %edi, %k1
15286 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15287 ; NoVLX-NEXT: kmovw %k0, %eax
15288 ; NoVLX-NEXT: vzeroupper
15291 %0 = bitcast <2 x i64> %__a to <8 x i16>
15292 %1 = bitcast <2 x i64> %__b to <8 x i16>
15293 %2 = icmp ult <8 x i16> %0, %1
15294 %3 = bitcast i8 %__u to <8 x i1>
15295 %4 = and <8 x i1> %2, %3
15296 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15297 %6 = bitcast <32 x i1> %5 to i32
15301 define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15302 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15303 ; VLX: # %bb.0: # %entry
15304 ; VLX-NEXT: kmovd %edi, %k1
15305 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15306 ; VLX-NEXT: kmovd %k0, %eax
15309 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
15310 ; NoVLX: # %bb.0: # %entry
15311 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
15312 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15313 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15314 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15315 ; NoVLX-NEXT: kmovw %edi, %k1
15316 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15317 ; NoVLX-NEXT: kmovw %k0, %eax
15318 ; NoVLX-NEXT: vzeroupper
15321 %0 = bitcast <2 x i64> %__a to <8 x i16>
15322 %load = load <2 x i64>, <2 x i64>* %__b
15323 %1 = bitcast <2 x i64> %load to <8 x i16>
15324 %2 = icmp ult <8 x i16> %0, %1
15325 %3 = bitcast i8 %__u to <8 x i1>
15326 %4 = and <8 x i1> %2, %3
15327 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15328 %6 = bitcast <32 x i1> %5 to i32
15333 define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15334 ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15335 ; VLX: # %bb.0: # %entry
15336 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
15337 ; VLX-NEXT: kmovq %k0, %rax
15340 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
15341 ; NoVLX: # %bb.0: # %entry
15342 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15343 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15344 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15345 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15346 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15347 ; NoVLX-NEXT: kmovw %k0, %eax
15348 ; NoVLX-NEXT: movzwl %ax, %eax
15349 ; NoVLX-NEXT: vzeroupper
15352 %0 = bitcast <2 x i64> %__a to <8 x i16>
15353 %1 = bitcast <2 x i64> %__b to <8 x i16>
15354 %2 = icmp ult <8 x i16> %0, %1
15355 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15356 %4 = bitcast <64 x i1> %3 to i64
15360 define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15361 ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15362 ; VLX: # %bb.0: # %entry
15363 ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
15364 ; VLX-NEXT: kmovq %k0, %rax
15367 ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
15368 ; NoVLX: # %bb.0: # %entry
15369 ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1
15370 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15371 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15372 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15373 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
15374 ; NoVLX-NEXT: kmovw %k0, %eax
15375 ; NoVLX-NEXT: movzwl %ax, %eax
15376 ; NoVLX-NEXT: vzeroupper
15379 %0 = bitcast <2 x i64> %__a to <8 x i16>
15380 %load = load <2 x i64>, <2 x i64>* %__b
15381 %1 = bitcast <2 x i64> %load to <8 x i16>
15382 %2 = icmp ult <8 x i16> %0, %1
15383 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15384 %4 = bitcast <64 x i1> %3 to i64
15388 define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15389 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15390 ; VLX: # %bb.0: # %entry
15391 ; VLX-NEXT: kmovd %edi, %k1
15392 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
15393 ; VLX-NEXT: kmovq %k0, %rax
15396 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
15397 ; NoVLX: # %bb.0: # %entry
15398 ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
15399 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15400 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15401 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15402 ; NoVLX-NEXT: kmovw %edi, %k1
15403 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15404 ; NoVLX-NEXT: kmovw %k0, %eax
15405 ; NoVLX-NEXT: movzwl %ax, %eax
15406 ; NoVLX-NEXT: vzeroupper
15409 %0 = bitcast <2 x i64> %__a to <8 x i16>
15410 %1 = bitcast <2 x i64> %__b to <8 x i16>
15411 %2 = icmp ult <8 x i16> %0, %1
15412 %3 = bitcast i8 %__u to <8 x i1>
15413 %4 = and <8 x i1> %2, %3
15414 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15415 %6 = bitcast <64 x i1> %5 to i64
15419 define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15420 ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15421 ; VLX: # %bb.0: # %entry
15422 ; VLX-NEXT: kmovd %edi, %k1
15423 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
15424 ; VLX-NEXT: kmovq %k0, %rax
15427 ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
15428 ; NoVLX: # %bb.0: # %entry
15429 ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1
15430 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
15431 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15432 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
15433 ; NoVLX-NEXT: kmovw %edi, %k1
15434 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
15435 ; NoVLX-NEXT: kmovw %k0, %eax
15436 ; NoVLX-NEXT: movzwl %ax, %eax
15437 ; NoVLX-NEXT: vzeroupper
15440 %0 = bitcast <2 x i64> %__a to <8 x i16>
15441 %load = load <2 x i64>, <2 x i64>* %__b
15442 %1 = bitcast <2 x i64> %load to <8 x i16>
15443 %2 = icmp ult <8 x i16> %0, %1
15444 %3 = bitcast i8 %__u to <8 x i1>
15445 %4 = and <8 x i1> %2, %3
15446 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15447 %6 = bitcast <64 x i1> %5 to i64
15452 define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15453 ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15454 ; VLX: # %bb.0: # %entry
15455 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
15456 ; VLX-NEXT: kmovd %k0, %eax
15457 ; VLX-NEXT: vzeroupper
15460 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
15461 ; NoVLX: # %bb.0: # %entry
15462 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15463 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15464 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15465 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15466 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15467 ; NoVLX-NEXT: kmovw %k0, %eax
15468 ; NoVLX-NEXT: vzeroupper
15471 %0 = bitcast <4 x i64> %__a to <16 x i16>
15472 %1 = bitcast <4 x i64> %__b to <16 x i16>
15473 %2 = icmp ult <16 x i16> %0, %1
15474 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15475 %4 = bitcast <32 x i1> %3 to i32
15479 define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15480 ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15481 ; VLX: # %bb.0: # %entry
15482 ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
15483 ; VLX-NEXT: kmovd %k0, %eax
15484 ; VLX-NEXT: vzeroupper
15487 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
15488 ; NoVLX: # %bb.0: # %entry
15489 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1
15490 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15491 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15492 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15493 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15494 ; NoVLX-NEXT: kmovw %k0, %eax
15495 ; NoVLX-NEXT: vzeroupper
15498 %0 = bitcast <4 x i64> %__a to <16 x i16>
15499 %load = load <4 x i64>, <4 x i64>* %__b
15500 %1 = bitcast <4 x i64> %load to <16 x i16>
15501 %2 = icmp ult <16 x i16> %0, %1
15502 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15503 %4 = bitcast <32 x i1> %3 to i32
15507 define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15508 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15509 ; VLX: # %bb.0: # %entry
15510 ; VLX-NEXT: kmovd %edi, %k1
15511 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15512 ; VLX-NEXT: kmovd %k0, %eax
15513 ; VLX-NEXT: vzeroupper
15516 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
15517 ; NoVLX: # %bb.0: # %entry
15518 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15519 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15520 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15521 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15522 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15523 ; NoVLX-NEXT: kmovw %k0, %eax
15524 ; NoVLX-NEXT: andl %edi, %eax
15525 ; NoVLX-NEXT: vzeroupper
15528 %0 = bitcast <4 x i64> %__a to <16 x i16>
15529 %1 = bitcast <4 x i64> %__b to <16 x i16>
15530 %2 = icmp ult <16 x i16> %0, %1
15531 %3 = bitcast i16 %__u to <16 x i1>
15532 %4 = and <16 x i1> %2, %3
15533 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15534 %6 = bitcast <32 x i1> %5 to i32
15538 define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15539 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15540 ; VLX: # %bb.0: # %entry
15541 ; VLX-NEXT: kmovd %edi, %k1
15542 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15543 ; VLX-NEXT: kmovd %k0, %eax
15544 ; VLX-NEXT: vzeroupper
15547 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
15548 ; NoVLX: # %bb.0: # %entry
15549 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15550 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15551 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15552 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15553 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15554 ; NoVLX-NEXT: kmovw %k0, %eax
15555 ; NoVLX-NEXT: andl %edi, %eax
15556 ; NoVLX-NEXT: vzeroupper
15559 %0 = bitcast <4 x i64> %__a to <16 x i16>
15560 %load = load <4 x i64>, <4 x i64>* %__b
15561 %1 = bitcast <4 x i64> %load to <16 x i16>
15562 %2 = icmp ult <16 x i16> %0, %1
15563 %3 = bitcast i16 %__u to <16 x i1>
15564 %4 = and <16 x i1> %2, %3
15565 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15566 %6 = bitcast <32 x i1> %5 to i32
15571 define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15572 ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15573 ; VLX: # %bb.0: # %entry
15574 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
15575 ; VLX-NEXT: kmovq %k0, %rax
15576 ; VLX-NEXT: vzeroupper
15579 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
15580 ; NoVLX: # %bb.0: # %entry
15581 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15582 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15583 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15584 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15585 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15586 ; NoVLX-NEXT: kmovw %k0, %eax
15587 ; NoVLX-NEXT: movzwl %ax, %eax
15588 ; NoVLX-NEXT: vzeroupper
15591 %0 = bitcast <4 x i64> %__a to <16 x i16>
15592 %1 = bitcast <4 x i64> %__b to <16 x i16>
15593 %2 = icmp ult <16 x i16> %0, %1
15594 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15595 %4 = bitcast <64 x i1> %3 to i64
15599 define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15600 ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15601 ; VLX: # %bb.0: # %entry
15602 ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
15603 ; VLX-NEXT: kmovq %k0, %rax
15604 ; VLX-NEXT: vzeroupper
15607 ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
15608 ; NoVLX: # %bb.0: # %entry
15609 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1
15610 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15611 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15612 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15613 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15614 ; NoVLX-NEXT: kmovw %k0, %eax
15615 ; NoVLX-NEXT: movzwl %ax, %eax
15616 ; NoVLX-NEXT: vzeroupper
15619 %0 = bitcast <4 x i64> %__a to <16 x i16>
15620 %load = load <4 x i64>, <4 x i64>* %__b
15621 %1 = bitcast <4 x i64> %load to <16 x i16>
15622 %2 = icmp ult <16 x i16> %0, %1
15623 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15624 %4 = bitcast <64 x i1> %3 to i64
15628 define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
15629 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15630 ; VLX: # %bb.0: # %entry
15631 ; VLX-NEXT: kmovd %edi, %k1
15632 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
15633 ; VLX-NEXT: kmovq %k0, %rax
15634 ; VLX-NEXT: vzeroupper
15637 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
15638 ; NoVLX: # %bb.0: # %entry
15639 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15640 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15641 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15642 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15643 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15644 ; NoVLX-NEXT: kmovw %k0, %eax
15645 ; NoVLX-NEXT: andl %edi, %eax
15646 ; NoVLX-NEXT: vzeroupper
15649 %0 = bitcast <4 x i64> %__a to <16 x i16>
15650 %1 = bitcast <4 x i64> %__b to <16 x i16>
15651 %2 = icmp ult <16 x i16> %0, %1
15652 %3 = bitcast i16 %__u to <16 x i1>
15653 %4 = and <16 x i1> %2, %3
15654 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15655 %6 = bitcast <64 x i1> %5 to i64
15659 define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
15660 ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15661 ; VLX: # %bb.0: # %entry
15662 ; VLX-NEXT: kmovd %edi, %k1
15663 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
15664 ; VLX-NEXT: kmovq %k0, %rax
15665 ; VLX-NEXT: vzeroupper
15668 ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
15669 ; NoVLX: # %bb.0: # %entry
15670 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15671 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15672 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15673 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15674 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15675 ; NoVLX-NEXT: kmovw %k0, %eax
15676 ; NoVLX-NEXT: andl %edi, %eax
15677 ; NoVLX-NEXT: vzeroupper
15680 %0 = bitcast <4 x i64> %__a to <16 x i16>
15681 %load = load <4 x i64>, <4 x i64>* %__b
15682 %1 = bitcast <4 x i64> %load to <16 x i16>
15683 %2 = icmp ult <16 x i16> %0, %1
15684 %3 = bitcast i16 %__u to <16 x i1>
15685 %4 = and <16 x i1> %2, %3
15686 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
15687 %6 = bitcast <64 x i1> %5 to i64
15692 define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15693 ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15694 ; VLX: # %bb.0: # %entry
15695 ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
15696 ; VLX-NEXT: kmovq %k0, %rax
15697 ; VLX-NEXT: vzeroupper
15700 ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
15701 ; NoVLX: # %bb.0: # %entry
15702 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
15703 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
15704 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15705 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15706 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15707 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15708 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15709 ; NoVLX-NEXT: kmovw %k0, %ecx
15710 ; NoVLX-NEXT: vpmaxuw %ymm3, %ymm2, %ymm0
15711 ; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
15712 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15713 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15714 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15715 ; NoVLX-NEXT: kmovw %k0, %eax
15716 ; NoVLX-NEXT: shll $16, %eax
15717 ; NoVLX-NEXT: orl %ecx, %eax
15718 ; NoVLX-NEXT: vzeroupper
15721 %0 = bitcast <8 x i64> %__a to <32 x i16>
15722 %1 = bitcast <8 x i64> %__b to <32 x i16>
15723 %2 = icmp ult <32 x i16> %0, %1
15724 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15725 %4 = bitcast <64 x i1> %3 to i64
15729 define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
15730 ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15731 ; VLX: # %bb.0: # %entry
15732 ; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0
15733 ; VLX-NEXT: kmovq %k0, %rax
15734 ; VLX-NEXT: vzeroupper
15737 ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
15738 ; NoVLX: # %bb.0: # %entry
15739 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
15740 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm2
15741 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
15742 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15743 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15744 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15745 ; NoVLX-NEXT: kmovw %k0, %ecx
15746 ; NoVLX-NEXT: vpmaxuw 32(%rdi), %ymm1, %ymm0
15747 ; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
15748 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15749 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15750 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15751 ; NoVLX-NEXT: kmovw %k0, %eax
15752 ; NoVLX-NEXT: shll $16, %eax
15753 ; NoVLX-NEXT: orl %ecx, %eax
15754 ; NoVLX-NEXT: vzeroupper
15757 %0 = bitcast <8 x i64> %__a to <32 x i16>
15758 %load = load <8 x i64>, <8 x i64>* %__b
15759 %1 = bitcast <8 x i64> %load to <32 x i16>
15760 %2 = icmp ult <32 x i16> %0, %1
15761 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15762 %4 = bitcast <64 x i1> %3 to i64
15766 define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
15767 ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15768 ; VLX: # %bb.0: # %entry
15769 ; VLX-NEXT: kmovd %edi, %k1
15770 ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
15771 ; VLX-NEXT: kmovq %k0, %rax
15772 ; VLX-NEXT: vzeroupper
15775 ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
15776 ; NoVLX: # %bb.0: # %entry
15777 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
15778 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
15779 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
15780 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
15781 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
15782 ; NoVLX-NEXT: kmovw %k0, %eax
15783 ; NoVLX-NEXT: andl %edi, %eax
15784 ; NoVLX-NEXT: shrl $16, %edi
15785 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
15786 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
15787 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
15788 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15789 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15790 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15791 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15792 ; NoVLX-NEXT: kmovw %k0, %ecx
15793 ; NoVLX-NEXT: andl %edi, %ecx
15794 ; NoVLX-NEXT: shll $16, %ecx
15795 ; NoVLX-NEXT: movzwl %ax, %eax
15796 ; NoVLX-NEXT: orl %ecx, %eax
15797 ; NoVLX-NEXT: vzeroupper
15800 %0 = bitcast <8 x i64> %__a to <32 x i16>
15801 %1 = bitcast <8 x i64> %__b to <32 x i16>
15802 %2 = icmp ult <32 x i16> %0, %1
15803 %3 = bitcast i32 %__u to <32 x i1>
15804 %4 = and <32 x i1> %2, %3
15805 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15806 %6 = bitcast <64 x i1> %5 to i64
15810 define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
15811 ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15812 ; VLX: # %bb.0: # %entry
15813 ; VLX-NEXT: kmovd %edi, %k1
15814 ; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
15815 ; VLX-NEXT: kmovq %k0, %rax
15816 ; VLX-NEXT: vzeroupper
15819 ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
15820 ; NoVLX: # %bb.0: # %entry
15821 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
15822 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
15823 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
15824 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
15825 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
15826 ; NoVLX-NEXT: kmovw %k0, %eax
15827 ; NoVLX-NEXT: andl %edi, %eax
15828 ; NoVLX-NEXT: shrl $16, %edi
15829 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
15830 ; NoVLX-NEXT: vpmaxuw 32(%rsi), %ymm0, %ymm1
15831 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
15832 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
15833 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
15834 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
15835 ; NoVLX-NEXT: kmovw %k0, %ecx
15836 ; NoVLX-NEXT: andl %edi, %ecx
15837 ; NoVLX-NEXT: shll $16, %ecx
15838 ; NoVLX-NEXT: movzwl %ax, %eax
15839 ; NoVLX-NEXT: orl %ecx, %eax
15840 ; NoVLX-NEXT: vzeroupper
15843 %0 = bitcast <8 x i64> %__a to <32 x i16>
15844 %load = load <8 x i64>, <8 x i64>* %__b
15845 %1 = bitcast <8 x i64> %load to <32 x i16>
15846 %2 = icmp ult <32 x i16> %0, %1
15847 %3 = bitcast i32 %__u to <32 x i1>
15848 %4 = and <32 x i1> %2, %3
15849 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
15850 %6 = bitcast <64 x i1> %5 to i64
15855 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15856 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15857 ; VLX: # %bb.0: # %entry
15858 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
15859 ; VLX-NEXT: kmovd %k0, %eax
15860 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15863 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
15864 ; NoVLX: # %bb.0: # %entry
15865 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15866 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15867 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15868 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15869 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15870 ; NoVLX-NEXT: kmovw %k0, %eax
15871 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15872 ; NoVLX-NEXT: vzeroupper
15875 %0 = bitcast <2 x i64> %__a to <4 x i32>
15876 %1 = bitcast <2 x i64> %__b to <4 x i32>
15877 %2 = icmp ult <4 x i32> %0, %1
15878 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15879 %4 = bitcast <8 x i1> %3 to i8
15883 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15884 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15885 ; VLX: # %bb.0: # %entry
15886 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
15887 ; VLX-NEXT: kmovd %k0, %eax
15888 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15891 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
15892 ; NoVLX: # %bb.0: # %entry
15893 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15894 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
15895 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15896 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15897 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15898 ; NoVLX-NEXT: kmovw %k0, %eax
15899 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15900 ; NoVLX-NEXT: vzeroupper
15903 %0 = bitcast <2 x i64> %__a to <4 x i32>
15904 %load = load <2 x i64>, <2 x i64>* %__b
15905 %1 = bitcast <2 x i64> %load to <4 x i32>
15906 %2 = icmp ult <4 x i32> %0, %1
15907 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15908 %4 = bitcast <8 x i1> %3 to i8
15912 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
15913 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
15914 ; VLX: # %bb.0: # %entry
15915 ; VLX-NEXT: kmovd %edi, %k1
15916 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
15917 ; VLX-NEXT: kmovd %k0, %eax
15918 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15921 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
15922 ; NoVLX: # %bb.0: # %entry
15923 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
15924 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15925 ; NoVLX-NEXT: kmovw %edi, %k1
15926 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15927 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15928 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15929 ; NoVLX-NEXT: kmovw %k0, %eax
15930 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15931 ; NoVLX-NEXT: vzeroupper
15934 %0 = bitcast <2 x i64> %__a to <4 x i32>
15935 %1 = bitcast <2 x i64> %__b to <4 x i32>
15936 %2 = icmp ult <4 x i32> %0, %1
15937 %3 = bitcast i8 %__u to <8 x i1>
15938 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15939 %4 = and <4 x i1> %2, %extract.i
15940 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15941 %6 = bitcast <8 x i1> %5 to i8
15945 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
15946 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
15947 ; VLX: # %bb.0: # %entry
15948 ; VLX-NEXT: kmovd %edi, %k1
15949 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
15950 ; VLX-NEXT: kmovd %k0, %eax
15951 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15954 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
15955 ; NoVLX: # %bb.0: # %entry
15956 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15957 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
15958 ; NoVLX-NEXT: kmovw %edi, %k1
15959 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
15960 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15961 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15962 ; NoVLX-NEXT: kmovw %k0, %eax
15963 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15964 ; NoVLX-NEXT: vzeroupper
15967 %0 = bitcast <2 x i64> %__a to <4 x i32>
15968 %load = load <2 x i64>, <2 x i64>* %__b
15969 %1 = bitcast <2 x i64> %load to <4 x i32>
15970 %2 = icmp ult <4 x i32> %0, %1
15971 %3 = bitcast i8 %__u to <8 x i1>
15972 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15973 %4 = and <4 x i1> %2, %extract.i
15974 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
15975 %6 = bitcast <8 x i1> %5 to i8
15980 define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
15981 ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
15982 ; VLX: # %bb.0: # %entry
15983 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
15984 ; VLX-NEXT: kmovd %k0, %eax
15985 ; VLX-NEXT: # kill: def $al killed $al killed $eax
15988 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
15989 ; NoVLX: # %bb.0: # %entry
15990 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15991 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
15992 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
15993 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
15994 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
15995 ; NoVLX-NEXT: kmovw %k0, %eax
15996 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
15997 ; NoVLX-NEXT: vzeroupper
16000 %0 = bitcast <2 x i64> %__a to <4 x i32>
16001 %load = load i32, i32* %__b
16002 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16003 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16004 %2 = icmp ult <4 x i32> %0, %1
16005 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
16006 %4 = bitcast <8 x i1> %3 to i8
16010 define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16011 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
16012 ; VLX: # %bb.0: # %entry
16013 ; VLX-NEXT: kmovd %edi, %k1
16014 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16015 ; VLX-NEXT: kmovd %k0, %eax
16016 ; VLX-NEXT: # kill: def $al killed $al killed $eax
16019 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
16020 ; NoVLX: # %bb.0: # %entry
16021 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16022 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
16023 ; NoVLX-NEXT: kmovw %edi, %k1
16024 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16025 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16026 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16027 ; NoVLX-NEXT: kmovw %k0, %eax
16028 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
16029 ; NoVLX-NEXT: vzeroupper
16032 %0 = bitcast <2 x i64> %__a to <4 x i32>
16033 %load = load i32, i32* %__b
16034 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16035 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16036 %2 = icmp ult <4 x i32> %0, %1
16037 %3 = bitcast i8 %__u to <8 x i1>
16038 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16039 %4 = and <4 x i1> %extract.i, %2
16040 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
16041 %6 = bitcast <8 x i1> %5 to i8
16046 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16047 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
16048 ; VLX: # %bb.0: # %entry
16049 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
16050 ; VLX-NEXT: kmovd %k0, %eax
16051 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16054 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
16055 ; NoVLX: # %bb.0: # %entry
16056 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16057 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16058 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16059 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16060 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16061 ; NoVLX-NEXT: kmovw %k0, %eax
16062 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16063 ; NoVLX-NEXT: vzeroupper
16066 %0 = bitcast <2 x i64> %__a to <4 x i32>
16067 %1 = bitcast <2 x i64> %__b to <4 x i32>
16068 %2 = icmp ult <4 x i32> %0, %1
16069 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16070 %4 = bitcast <16 x i1> %3 to i16
16074 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16075 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
16076 ; VLX: # %bb.0: # %entry
16077 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
16078 ; VLX-NEXT: kmovd %k0, %eax
16079 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16082 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
16083 ; NoVLX: # %bb.0: # %entry
16084 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16085 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
16086 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16087 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16088 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16089 ; NoVLX-NEXT: kmovw %k0, %eax
16090 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16091 ; NoVLX-NEXT: vzeroupper
16094 %0 = bitcast <2 x i64> %__a to <4 x i32>
16095 %load = load <2 x i64>, <2 x i64>* %__b
16096 %1 = bitcast <2 x i64> %load to <4 x i32>
16097 %2 = icmp ult <4 x i32> %0, %1
16098 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16099 %4 = bitcast <16 x i1> %3 to i16
16103 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16104 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
16105 ; VLX: # %bb.0: # %entry
16106 ; VLX-NEXT: kmovd %edi, %k1
16107 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16108 ; VLX-NEXT: kmovd %k0, %eax
16109 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16112 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
16113 ; NoVLX: # %bb.0: # %entry
16114 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16115 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16116 ; NoVLX-NEXT: kmovw %edi, %k1
16117 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16118 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16119 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16120 ; NoVLX-NEXT: kmovw %k0, %eax
16121 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16122 ; NoVLX-NEXT: vzeroupper
16125 %0 = bitcast <2 x i64> %__a to <4 x i32>
16126 %1 = bitcast <2 x i64> %__b to <4 x i32>
16127 %2 = icmp ult <4 x i32> %0, %1
16128 %3 = bitcast i8 %__u to <8 x i1>
16129 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16130 %4 = and <4 x i1> %2, %extract.i
16131 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16132 %6 = bitcast <16 x i1> %5 to i16
16136 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16137 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
16138 ; VLX: # %bb.0: # %entry
16139 ; VLX-NEXT: kmovd %edi, %k1
16140 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16141 ; VLX-NEXT: kmovd %k0, %eax
16142 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16145 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
16146 ; NoVLX: # %bb.0: # %entry
16147 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16148 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
16149 ; NoVLX-NEXT: kmovw %edi, %k1
16150 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16151 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16152 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16153 ; NoVLX-NEXT: kmovw %k0, %eax
16154 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16155 ; NoVLX-NEXT: vzeroupper
16158 %0 = bitcast <2 x i64> %__a to <4 x i32>
16159 %load = load <2 x i64>, <2 x i64>* %__b
16160 %1 = bitcast <2 x i64> %load to <4 x i32>
16161 %2 = icmp ult <4 x i32> %0, %1
16162 %3 = bitcast i8 %__u to <8 x i1>
16163 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16164 %4 = and <4 x i1> %2, %extract.i
16165 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16166 %6 = bitcast <16 x i1> %5 to i16
16171 define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16172 ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
16173 ; VLX: # %bb.0: # %entry
16174 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
16175 ; VLX-NEXT: kmovd %k0, %eax
16176 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16179 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
16180 ; NoVLX: # %bb.0: # %entry
16181 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16182 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
16183 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16184 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16185 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16186 ; NoVLX-NEXT: kmovw %k0, %eax
16187 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16188 ; NoVLX-NEXT: vzeroupper
16191 %0 = bitcast <2 x i64> %__a to <4 x i32>
16192 %load = load i32, i32* %__b
16193 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16194 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16195 %2 = icmp ult <4 x i32> %0, %1
16196 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16197 %4 = bitcast <16 x i1> %3 to i16
16201 define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16202 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
16203 ; VLX: # %bb.0: # %entry
16204 ; VLX-NEXT: kmovd %edi, %k1
16205 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16206 ; VLX-NEXT: kmovd %k0, %eax
16207 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16210 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
16211 ; NoVLX: # %bb.0: # %entry
16212 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16213 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
16214 ; NoVLX-NEXT: kmovw %edi, %k1
16215 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16216 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16217 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16218 ; NoVLX-NEXT: kmovw %k0, %eax
16219 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16220 ; NoVLX-NEXT: vzeroupper
16223 %0 = bitcast <2 x i64> %__a to <4 x i32>
16224 %load = load i32, i32* %__b
16225 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16226 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16227 %2 = icmp ult <4 x i32> %0, %1
16228 %3 = bitcast i8 %__u to <8 x i1>
16229 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16230 %4 = and <4 x i1> %extract.i, %2
16231 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16232 %6 = bitcast <16 x i1> %5 to i16
16237 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16238 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
16239 ; VLX: # %bb.0: # %entry
16240 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
16241 ; VLX-NEXT: kmovd %k0, %eax
16244 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
16245 ; NoVLX: # %bb.0: # %entry
16246 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16247 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16248 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16249 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16250 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16251 ; NoVLX-NEXT: kmovw %k0, %eax
16252 ; NoVLX-NEXT: vzeroupper
16255 %0 = bitcast <2 x i64> %__a to <4 x i32>
16256 %1 = bitcast <2 x i64> %__b to <4 x i32>
16257 %2 = icmp ult <4 x i32> %0, %1
16258 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16259 %4 = bitcast <32 x i1> %3 to i32
16263 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16264 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16265 ; VLX: # %bb.0: # %entry
16266 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
16267 ; VLX-NEXT: kmovd %k0, %eax
16270 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
16271 ; NoVLX: # %bb.0: # %entry
16272 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16273 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
16274 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16275 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16276 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16277 ; NoVLX-NEXT: kmovw %k0, %eax
16278 ; NoVLX-NEXT: vzeroupper
16281 %0 = bitcast <2 x i64> %__a to <4 x i32>
16282 %load = load <2 x i64>, <2 x i64>* %__b
16283 %1 = bitcast <2 x i64> %load to <4 x i32>
16284 %2 = icmp ult <4 x i32> %0, %1
16285 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16286 %4 = bitcast <32 x i1> %3 to i32
16290 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16291 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16292 ; VLX: # %bb.0: # %entry
16293 ; VLX-NEXT: kmovd %edi, %k1
16294 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16295 ; VLX-NEXT: kmovd %k0, %eax
16298 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
16299 ; NoVLX: # %bb.0: # %entry
16300 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16301 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16302 ; NoVLX-NEXT: kmovw %edi, %k1
16303 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16304 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16305 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16306 ; NoVLX-NEXT: kmovw %k0, %eax
16307 ; NoVLX-NEXT: vzeroupper
16310 %0 = bitcast <2 x i64> %__a to <4 x i32>
16311 %1 = bitcast <2 x i64> %__b to <4 x i32>
16312 %2 = icmp ult <4 x i32> %0, %1
16313 %3 = bitcast i8 %__u to <8 x i1>
16314 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16315 %4 = and <4 x i1> %2, %extract.i
16316 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16317 %6 = bitcast <32 x i1> %5 to i32
16321 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16322 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16323 ; VLX: # %bb.0: # %entry
16324 ; VLX-NEXT: kmovd %edi, %k1
16325 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16326 ; VLX-NEXT: kmovd %k0, %eax
16329 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
16330 ; NoVLX: # %bb.0: # %entry
16331 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16332 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
16333 ; NoVLX-NEXT: kmovw %edi, %k1
16334 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16335 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16336 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16337 ; NoVLX-NEXT: kmovw %k0, %eax
16338 ; NoVLX-NEXT: vzeroupper
16341 %0 = bitcast <2 x i64> %__a to <4 x i32>
16342 %load = load <2 x i64>, <2 x i64>* %__b
16343 %1 = bitcast <2 x i64> %load to <4 x i32>
16344 %2 = icmp ult <4 x i32> %0, %1
16345 %3 = bitcast i8 %__u to <8 x i1>
16346 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16347 %4 = and <4 x i1> %2, %extract.i
16348 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16349 %6 = bitcast <32 x i1> %5 to i32
16354 define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16355 ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16356 ; VLX: # %bb.0: # %entry
16357 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
16358 ; VLX-NEXT: kmovd %k0, %eax
16361 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
16362 ; NoVLX: # %bb.0: # %entry
16363 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16364 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
16365 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16366 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16367 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16368 ; NoVLX-NEXT: kmovw %k0, %eax
16369 ; NoVLX-NEXT: vzeroupper
16372 %0 = bitcast <2 x i64> %__a to <4 x i32>
16373 %load = load i32, i32* %__b
16374 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16375 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16376 %2 = icmp ult <4 x i32> %0, %1
16377 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16378 %4 = bitcast <32 x i1> %3 to i32
16382 define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16383 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16384 ; VLX: # %bb.0: # %entry
16385 ; VLX-NEXT: kmovd %edi, %k1
16386 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16387 ; VLX-NEXT: kmovd %k0, %eax
16390 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
16391 ; NoVLX: # %bb.0: # %entry
16392 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16393 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
16394 ; NoVLX-NEXT: kmovw %edi, %k1
16395 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16396 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16397 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16398 ; NoVLX-NEXT: kmovw %k0, %eax
16399 ; NoVLX-NEXT: vzeroupper
16402 %0 = bitcast <2 x i64> %__a to <4 x i32>
16403 %load = load i32, i32* %__b
16404 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16405 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16406 %2 = icmp ult <4 x i32> %0, %1
16407 %3 = bitcast i8 %__u to <8 x i1>
16408 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16409 %4 = and <4 x i1> %extract.i, %2
16410 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16411 %6 = bitcast <32 x i1> %5 to i32
16416 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16417 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16418 ; VLX: # %bb.0: # %entry
16419 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
16420 ; VLX-NEXT: kmovq %k0, %rax
16423 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
16424 ; NoVLX: # %bb.0: # %entry
16425 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16426 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16427 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16428 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16429 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16430 ; NoVLX-NEXT: kmovw %k0, %eax
16431 ; NoVLX-NEXT: movzwl %ax, %eax
16432 ; NoVLX-NEXT: vzeroupper
16435 %0 = bitcast <2 x i64> %__a to <4 x i32>
16436 %1 = bitcast <2 x i64> %__b to <4 x i32>
16437 %2 = icmp ult <4 x i32> %0, %1
16438 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16439 %4 = bitcast <64 x i1> %3 to i64
16443 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16444 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16445 ; VLX: # %bb.0: # %entry
16446 ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
16447 ; VLX-NEXT: kmovq %k0, %rax
16450 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
16451 ; NoVLX: # %bb.0: # %entry
16452 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16453 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
16454 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16455 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16456 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16457 ; NoVLX-NEXT: kmovw %k0, %eax
16458 ; NoVLX-NEXT: movzwl %ax, %eax
16459 ; NoVLX-NEXT: vzeroupper
16462 %0 = bitcast <2 x i64> %__a to <4 x i32>
16463 %load = load <2 x i64>, <2 x i64>* %__b
16464 %1 = bitcast <2 x i64> %load to <4 x i32>
16465 %2 = icmp ult <4 x i32> %0, %1
16466 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16467 %4 = bitcast <64 x i1> %3 to i64
16471 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
16472 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16473 ; VLX: # %bb.0: # %entry
16474 ; VLX-NEXT: kmovd %edi, %k1
16475 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
16476 ; VLX-NEXT: kmovq %k0, %rax
16479 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
16480 ; NoVLX: # %bb.0: # %entry
16481 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
16482 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16483 ; NoVLX-NEXT: kmovw %edi, %k1
16484 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16485 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16486 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16487 ; NoVLX-NEXT: kmovw %k0, %eax
16488 ; NoVLX-NEXT: movzwl %ax, %eax
16489 ; NoVLX-NEXT: vzeroupper
16492 %0 = bitcast <2 x i64> %__a to <4 x i32>
16493 %1 = bitcast <2 x i64> %__b to <4 x i32>
16494 %2 = icmp ult <4 x i32> %0, %1
16495 %3 = bitcast i8 %__u to <8 x i1>
16496 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16497 %4 = and <4 x i1> %2, %extract.i
16498 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16499 %6 = bitcast <64 x i1> %5 to i64
16503 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
16504 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16505 ; VLX: # %bb.0: # %entry
16506 ; VLX-NEXT: kmovd %edi, %k1
16507 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
16508 ; VLX-NEXT: kmovq %k0, %rax
16511 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
16512 ; NoVLX: # %bb.0: # %entry
16513 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16514 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
16515 ; NoVLX-NEXT: kmovw %edi, %k1
16516 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16517 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16518 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16519 ; NoVLX-NEXT: kmovw %k0, %eax
16520 ; NoVLX-NEXT: movzwl %ax, %eax
16521 ; NoVLX-NEXT: vzeroupper
16524 %0 = bitcast <2 x i64> %__a to <4 x i32>
16525 %load = load <2 x i64>, <2 x i64>* %__b
16526 %1 = bitcast <2 x i64> %load to <4 x i32>
16527 %2 = icmp ult <4 x i32> %0, %1
16528 %3 = bitcast i8 %__u to <8 x i1>
16529 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16530 %4 = and <4 x i1> %2, %extract.i
16531 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16532 %6 = bitcast <64 x i1> %5 to i64
16537 define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
16538 ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16539 ; VLX: # %bb.0: # %entry
16540 ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
16541 ; VLX-NEXT: kmovq %k0, %rax
16544 ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
16545 ; NoVLX: # %bb.0: # %entry
16546 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16547 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
16548 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16549 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16550 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16551 ; NoVLX-NEXT: kmovw %k0, %eax
16552 ; NoVLX-NEXT: movzwl %ax, %eax
16553 ; NoVLX-NEXT: vzeroupper
16556 %0 = bitcast <2 x i64> %__a to <4 x i32>
16557 %load = load i32, i32* %__b
16558 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16559 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16560 %2 = icmp ult <4 x i32> %0, %1
16561 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16562 %4 = bitcast <64 x i1> %3 to i64
16566 define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
16567 ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16568 ; VLX: # %bb.0: # %entry
16569 ; VLX-NEXT: kmovd %edi, %k1
16570 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
16571 ; VLX-NEXT: kmovq %k0, %rax
16574 ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
16575 ; NoVLX: # %bb.0: # %entry
16576 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16577 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
16578 ; NoVLX-NEXT: kmovw %edi, %k1
16579 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16580 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
16581 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
16582 ; NoVLX-NEXT: kmovw %k0, %eax
16583 ; NoVLX-NEXT: movzwl %ax, %eax
16584 ; NoVLX-NEXT: vzeroupper
16587 %0 = bitcast <2 x i64> %__a to <4 x i32>
16588 %load = load i32, i32* %__b
16589 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
16590 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
16591 %2 = icmp ult <4 x i32> %0, %1
16592 %3 = bitcast i8 %__u to <8 x i1>
16593 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
16594 %4 = and <4 x i1> %extract.i, %2
16595 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
16596 %6 = bitcast <64 x i1> %5 to i64
16601 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16602 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16603 ; VLX: # %bb.0: # %entry
16604 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16605 ; VLX-NEXT: kmovd %k0, %eax
16606 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16607 ; VLX-NEXT: vzeroupper
16610 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
16611 ; NoVLX: # %bb.0: # %entry
16612 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16613 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16614 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16615 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16616 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16617 ; NoVLX-NEXT: kmovw %k0, %eax
16618 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16619 ; NoVLX-NEXT: vzeroupper
16622 %0 = bitcast <4 x i64> %__a to <8 x i32>
16623 %1 = bitcast <4 x i64> %__b to <8 x i32>
16624 %2 = icmp ult <8 x i32> %0, %1
16625 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16626 %4 = bitcast <16 x i1> %3 to i16
16630 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16631 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16632 ; VLX: # %bb.0: # %entry
16633 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16634 ; VLX-NEXT: kmovd %k0, %eax
16635 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16636 ; VLX-NEXT: vzeroupper
16639 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
16640 ; NoVLX: # %bb.0: # %entry
16641 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16642 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16643 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16644 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16645 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16646 ; NoVLX-NEXT: kmovw %k0, %eax
16647 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16648 ; NoVLX-NEXT: vzeroupper
16651 %0 = bitcast <4 x i64> %__a to <8 x i32>
16652 %load = load <4 x i64>, <4 x i64>* %__b
16653 %1 = bitcast <4 x i64> %load to <8 x i32>
16654 %2 = icmp ult <8 x i32> %0, %1
16655 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16656 %4 = bitcast <16 x i1> %3 to i16
16660 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16661 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16662 ; VLX: # %bb.0: # %entry
16663 ; VLX-NEXT: kmovd %edi, %k1
16664 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16665 ; VLX-NEXT: kmovd %k0, %eax
16666 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16667 ; VLX-NEXT: vzeroupper
16670 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
16671 ; NoVLX: # %bb.0: # %entry
16672 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16673 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16674 ; NoVLX-NEXT: kmovw %edi, %k1
16675 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16676 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16677 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16678 ; NoVLX-NEXT: kmovw %k0, %eax
16679 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16680 ; NoVLX-NEXT: vzeroupper
16683 %0 = bitcast <4 x i64> %__a to <8 x i32>
16684 %1 = bitcast <4 x i64> %__b to <8 x i32>
16685 %2 = icmp ult <8 x i32> %0, %1
16686 %3 = bitcast i8 %__u to <8 x i1>
16687 %4 = and <8 x i1> %2, %3
16688 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16689 %6 = bitcast <16 x i1> %5 to i16
16693 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16694 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16695 ; VLX: # %bb.0: # %entry
16696 ; VLX-NEXT: kmovd %edi, %k1
16697 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16698 ; VLX-NEXT: kmovd %k0, %eax
16699 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16700 ; VLX-NEXT: vzeroupper
16703 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
16704 ; NoVLX: # %bb.0: # %entry
16705 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16706 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16707 ; NoVLX-NEXT: kmovw %edi, %k1
16708 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16709 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16710 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16711 ; NoVLX-NEXT: kmovw %k0, %eax
16712 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16713 ; NoVLX-NEXT: vzeroupper
16716 %0 = bitcast <4 x i64> %__a to <8 x i32>
16717 %load = load <4 x i64>, <4 x i64>* %__b
16718 %1 = bitcast <4 x i64> %load to <8 x i32>
16719 %2 = icmp ult <8 x i32> %0, %1
16720 %3 = bitcast i8 %__u to <8 x i1>
16721 %4 = and <8 x i1> %2, %3
16722 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16723 %6 = bitcast <16 x i1> %5 to i16
16728 define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
16729 ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16730 ; VLX: # %bb.0: # %entry
16731 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16732 ; VLX-NEXT: kmovd %k0, %eax
16733 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16734 ; VLX-NEXT: vzeroupper
16737 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
16738 ; NoVLX: # %bb.0: # %entry
16739 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16740 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
16741 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16742 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16743 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16744 ; NoVLX-NEXT: kmovw %k0, %eax
16745 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16746 ; NoVLX-NEXT: vzeroupper
16749 %0 = bitcast <4 x i64> %__a to <8 x i32>
16750 %load = load i32, i32* %__b
16751 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16752 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16753 %2 = icmp ult <8 x i32> %0, %1
16754 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16755 %4 = bitcast <16 x i1> %3 to i16
16759 define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
16760 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16761 ; VLX: # %bb.0: # %entry
16762 ; VLX-NEXT: kmovd %edi, %k1
16763 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16764 ; VLX-NEXT: kmovd %k0, %eax
16765 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
16766 ; VLX-NEXT: vzeroupper
16769 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
16770 ; NoVLX: # %bb.0: # %entry
16771 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16772 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
16773 ; NoVLX-NEXT: kmovw %edi, %k1
16774 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16775 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16776 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16777 ; NoVLX-NEXT: kmovw %k0, %eax
16778 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
16779 ; NoVLX-NEXT: vzeroupper
16782 %0 = bitcast <4 x i64> %__a to <8 x i32>
16783 %load = load i32, i32* %__b
16784 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16785 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16786 %2 = icmp ult <8 x i32> %0, %1
16787 %3 = bitcast i8 %__u to <8 x i1>
16788 %4 = and <8 x i1> %3, %2
16789 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16790 %6 = bitcast <16 x i1> %5 to i16
16795 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16796 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16797 ; VLX: # %bb.0: # %entry
16798 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16799 ; VLX-NEXT: kmovd %k0, %eax
16800 ; VLX-NEXT: vzeroupper
16803 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
16804 ; NoVLX: # %bb.0: # %entry
16805 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16806 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16807 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16808 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16809 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16810 ; NoVLX-NEXT: kmovw %k0, %eax
16811 ; NoVLX-NEXT: vzeroupper
16814 %0 = bitcast <4 x i64> %__a to <8 x i32>
16815 %1 = bitcast <4 x i64> %__b to <8 x i32>
16816 %2 = icmp ult <8 x i32> %0, %1
16817 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16818 %4 = bitcast <32 x i1> %3 to i32
16822 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16823 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16824 ; VLX: # %bb.0: # %entry
16825 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
16826 ; VLX-NEXT: kmovd %k0, %eax
16827 ; VLX-NEXT: vzeroupper
16830 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
16831 ; NoVLX: # %bb.0: # %entry
16832 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16833 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
16834 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16835 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16836 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16837 ; NoVLX-NEXT: kmovw %k0, %eax
16838 ; NoVLX-NEXT: vzeroupper
16841 %0 = bitcast <4 x i64> %__a to <8 x i32>
16842 %load = load <4 x i64>, <4 x i64>* %__b
16843 %1 = bitcast <4 x i64> %load to <8 x i32>
16844 %2 = icmp ult <8 x i32> %0, %1
16845 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16846 %4 = bitcast <32 x i1> %3 to i32
16850 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16851 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16852 ; VLX: # %bb.0: # %entry
16853 ; VLX-NEXT: kmovd %edi, %k1
16854 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
16855 ; VLX-NEXT: kmovd %k0, %eax
16856 ; VLX-NEXT: vzeroupper
16859 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
16860 ; NoVLX: # %bb.0: # %entry
16861 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16862 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16863 ; NoVLX-NEXT: kmovw %edi, %k1
16864 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16865 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16866 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16867 ; NoVLX-NEXT: kmovw %k0, %eax
16868 ; NoVLX-NEXT: vzeroupper
16871 %0 = bitcast <4 x i64> %__a to <8 x i32>
16872 %1 = bitcast <4 x i64> %__b to <8 x i32>
16873 %2 = icmp ult <8 x i32> %0, %1
16874 %3 = bitcast i8 %__u to <8 x i1>
16875 %4 = and <8 x i1> %2, %3
16876 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16877 %6 = bitcast <32 x i1> %5 to i32
16881 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
16882 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16883 ; VLX: # %bb.0: # %entry
16884 ; VLX-NEXT: kmovd %edi, %k1
16885 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
16886 ; VLX-NEXT: kmovd %k0, %eax
16887 ; VLX-NEXT: vzeroupper
16890 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
16891 ; NoVLX: # %bb.0: # %entry
16892 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16893 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
16894 ; NoVLX-NEXT: kmovw %edi, %k1
16895 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16896 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16897 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16898 ; NoVLX-NEXT: kmovw %k0, %eax
16899 ; NoVLX-NEXT: vzeroupper
16902 %0 = bitcast <4 x i64> %__a to <8 x i32>
16903 %load = load <4 x i64>, <4 x i64>* %__b
16904 %1 = bitcast <4 x i64> %load to <8 x i32>
16905 %2 = icmp ult <8 x i32> %0, %1
16906 %3 = bitcast i8 %__u to <8 x i1>
16907 %4 = and <8 x i1> %2, %3
16908 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16909 %6 = bitcast <32 x i1> %5 to i32
16914 define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
16915 ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
16916 ; VLX: # %bb.0: # %entry
16917 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
16918 ; VLX-NEXT: kmovd %k0, %eax
16919 ; VLX-NEXT: vzeroupper
16922 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
16923 ; NoVLX: # %bb.0: # %entry
16924 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16925 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
16926 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16927 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16928 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16929 ; NoVLX-NEXT: kmovw %k0, %eax
16930 ; NoVLX-NEXT: vzeroupper
16933 %0 = bitcast <4 x i64> %__a to <8 x i32>
16934 %load = load i32, i32* %__b
16935 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16936 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16937 %2 = icmp ult <8 x i32> %0, %1
16938 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16939 %4 = bitcast <32 x i1> %3 to i32
16943 define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
16944 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
16945 ; VLX: # %bb.0: # %entry
16946 ; VLX-NEXT: kmovd %edi, %k1
16947 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
16948 ; VLX-NEXT: kmovd %k0, %eax
16949 ; VLX-NEXT: vzeroupper
16952 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
16953 ; NoVLX: # %bb.0: # %entry
16954 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16955 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
16956 ; NoVLX-NEXT: kmovw %edi, %k1
16957 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
16958 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16959 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16960 ; NoVLX-NEXT: kmovw %k0, %eax
16961 ; NoVLX-NEXT: vzeroupper
16964 %0 = bitcast <4 x i64> %__a to <8 x i32>
16965 %load = load i32, i32* %__b
16966 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
16967 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
16968 %2 = icmp ult <8 x i32> %0, %1
16969 %3 = bitcast i8 %__u to <8 x i1>
16970 %4 = and <8 x i1> %3, %2
16971 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
16972 %6 = bitcast <32 x i1> %5 to i32
16977 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
16978 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
16979 ; VLX: # %bb.0: # %entry
16980 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
16981 ; VLX-NEXT: kmovq %k0, %rax
16982 ; VLX-NEXT: vzeroupper
16985 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
16986 ; NoVLX: # %bb.0: # %entry
16987 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
16988 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
16989 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
16990 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
16991 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
16992 ; NoVLX-NEXT: kmovw %k0, %eax
16993 ; NoVLX-NEXT: movzwl %ax, %eax
16994 ; NoVLX-NEXT: vzeroupper
16997 %0 = bitcast <4 x i64> %__a to <8 x i32>
16998 %1 = bitcast <4 x i64> %__b to <8 x i32>
16999 %2 = icmp ult <8 x i32> %0, %1
17000 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17001 %4 = bitcast <64 x i1> %3 to i64
17005 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
17006 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
17007 ; VLX: # %bb.0: # %entry
17008 ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
17009 ; VLX-NEXT: kmovq %k0, %rax
17010 ; VLX-NEXT: vzeroupper
17013 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
17014 ; NoVLX: # %bb.0: # %entry
17015 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
17016 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
17017 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17018 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
17019 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
17020 ; NoVLX-NEXT: kmovw %k0, %eax
17021 ; NoVLX-NEXT: movzwl %ax, %eax
17022 ; NoVLX-NEXT: vzeroupper
17025 %0 = bitcast <4 x i64> %__a to <8 x i32>
17026 %load = load <4 x i64>, <4 x i64>* %__b
17027 %1 = bitcast <4 x i64> %load to <8 x i32>
17028 %2 = icmp ult <8 x i32> %0, %1
17029 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17030 %4 = bitcast <64 x i1> %3 to i64
17034 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
17035 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
17036 ; VLX: # %bb.0: # %entry
17037 ; VLX-NEXT: kmovd %edi, %k1
17038 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
17039 ; VLX-NEXT: kmovq %k0, %rax
17040 ; VLX-NEXT: vzeroupper
17043 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
17044 ; NoVLX: # %bb.0: # %entry
17045 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
17046 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
17047 ; NoVLX-NEXT: kmovw %edi, %k1
17048 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17049 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
17050 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
17051 ; NoVLX-NEXT: kmovw %k0, %eax
17052 ; NoVLX-NEXT: movzwl %ax, %eax
17053 ; NoVLX-NEXT: vzeroupper
17056 %0 = bitcast <4 x i64> %__a to <8 x i32>
17057 %1 = bitcast <4 x i64> %__b to <8 x i32>
17058 %2 = icmp ult <8 x i32> %0, %1
17059 %3 = bitcast i8 %__u to <8 x i1>
17060 %4 = and <8 x i1> %2, %3
17061 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17062 %6 = bitcast <64 x i1> %5 to i64
17066 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
17067 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
17068 ; VLX: # %bb.0: # %entry
17069 ; VLX-NEXT: kmovd %edi, %k1
17070 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
17071 ; VLX-NEXT: kmovq %k0, %rax
17072 ; VLX-NEXT: vzeroupper
17075 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
17076 ; NoVLX: # %bb.0: # %entry
17077 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
17078 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
17079 ; NoVLX-NEXT: kmovw %edi, %k1
17080 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17081 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
17082 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
17083 ; NoVLX-NEXT: kmovw %k0, %eax
17084 ; NoVLX-NEXT: movzwl %ax, %eax
17085 ; NoVLX-NEXT: vzeroupper
17088 %0 = bitcast <4 x i64> %__a to <8 x i32>
17089 %load = load <4 x i64>, <4 x i64>* %__b
17090 %1 = bitcast <4 x i64> %load to <8 x i32>
17091 %2 = icmp ult <8 x i32> %0, %1
17092 %3 = bitcast i8 %__u to <8 x i1>
17093 %4 = and <8 x i1> %2, %3
17094 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17095 %6 = bitcast <64 x i1> %5 to i64
17100 define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
17101 ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
17102 ; VLX: # %bb.0: # %entry
17103 ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
17104 ; VLX-NEXT: kmovq %k0, %rax
17105 ; VLX-NEXT: vzeroupper
17108 ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
17109 ; NoVLX: # %bb.0: # %entry
17110 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
17111 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
17112 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17113 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
17114 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
17115 ; NoVLX-NEXT: kmovw %k0, %eax
17116 ; NoVLX-NEXT: movzwl %ax, %eax
17117 ; NoVLX-NEXT: vzeroupper
17120 %0 = bitcast <4 x i64> %__a to <8 x i32>
17121 %load = load i32, i32* %__b
17122 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
17123 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17124 %2 = icmp ult <8 x i32> %0, %1
17125 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17126 %4 = bitcast <64 x i1> %3 to i64
17130 define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
17131 ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
17132 ; VLX: # %bb.0: # %entry
17133 ; VLX-NEXT: kmovd %edi, %k1
17134 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
17135 ; VLX-NEXT: kmovq %k0, %rax
17136 ; VLX-NEXT: vzeroupper
17139 ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
17140 ; NoVLX: # %bb.0: # %entry
17141 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
17142 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
17143 ; NoVLX-NEXT: kmovw %edi, %k1
17144 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17145 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
17146 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
17147 ; NoVLX-NEXT: kmovw %k0, %eax
17148 ; NoVLX-NEXT: movzwl %ax, %eax
17149 ; NoVLX-NEXT: vzeroupper
17152 %0 = bitcast <4 x i64> %__a to <8 x i32>
17153 %load = load i32, i32* %__b
17154 %vec = insertelement <8 x i32> undef, i32 %load, i32 0
17155 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17156 %2 = icmp ult <8 x i32> %0, %1
17157 %3 = bitcast i8 %__u to <8 x i1>
17158 %4 = and <8 x i1> %3, %2
17159 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17160 %6 = bitcast <64 x i1> %5 to i64
17165 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17166 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
17167 ; VLX: # %bb.0: # %entry
17168 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17169 ; VLX-NEXT: kmovd %k0, %eax
17170 ; VLX-NEXT: vzeroupper
17173 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
17174 ; NoVLX: # %bb.0: # %entry
17175 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17176 ; NoVLX-NEXT: kmovw %k0, %eax
17177 ; NoVLX-NEXT: vzeroupper
17180 %0 = bitcast <8 x i64> %__a to <16 x i32>
17181 %1 = bitcast <8 x i64> %__b to <16 x i32>
17182 %2 = icmp ult <16 x i32> %0, %1
17183 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17184 %4 = bitcast <32 x i1> %3 to i32
17188 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17189 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
17190 ; VLX: # %bb.0: # %entry
17191 ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17192 ; VLX-NEXT: kmovd %k0, %eax
17193 ; VLX-NEXT: vzeroupper
17196 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
17197 ; NoVLX: # %bb.0: # %entry
17198 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17199 ; NoVLX-NEXT: kmovw %k0, %eax
17200 ; NoVLX-NEXT: vzeroupper
17203 %0 = bitcast <8 x i64> %__a to <16 x i32>
17204 %load = load <8 x i64>, <8 x i64>* %__b
17205 %1 = bitcast <8 x i64> %load to <16 x i32>
17206 %2 = icmp ult <16 x i32> %0, %1
17207 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17208 %4 = bitcast <32 x i1> %3 to i32
17212 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17213 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
17214 ; VLX: # %bb.0: # %entry
17215 ; VLX-NEXT: kmovd %edi, %k1
17216 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17217 ; VLX-NEXT: kmovd %k0, %eax
17218 ; VLX-NEXT: vzeroupper
17221 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
17222 ; NoVLX: # %bb.0: # %entry
17223 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17224 ; NoVLX-NEXT: kmovw %k0, %eax
17225 ; NoVLX-NEXT: andl %edi, %eax
17226 ; NoVLX-NEXT: vzeroupper
17229 %0 = bitcast <8 x i64> %__a to <16 x i32>
17230 %1 = bitcast <8 x i64> %__b to <16 x i32>
17231 %2 = icmp ult <16 x i32> %0, %1
17232 %3 = bitcast i16 %__u to <16 x i1>
17233 %4 = and <16 x i1> %2, %3
17234 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17235 %6 = bitcast <32 x i1> %5 to i32
17239 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17240 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
17241 ; VLX: # %bb.0: # %entry
17242 ; VLX-NEXT: kmovd %edi, %k1
17243 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
17244 ; VLX-NEXT: kmovd %k0, %eax
17245 ; VLX-NEXT: vzeroupper
17248 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
17249 ; NoVLX: # %bb.0: # %entry
17250 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0
17251 ; NoVLX-NEXT: kmovw %k0, %eax
17252 ; NoVLX-NEXT: andl %edi, %eax
17253 ; NoVLX-NEXT: vzeroupper
17256 %0 = bitcast <8 x i64> %__a to <16 x i32>
17257 %load = load <8 x i64>, <8 x i64>* %__b
17258 %1 = bitcast <8 x i64> %load to <16 x i32>
17259 %2 = icmp ult <16 x i32> %0, %1
17260 %3 = bitcast i16 %__u to <16 x i1>
17261 %4 = and <16 x i1> %2, %3
17262 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17263 %6 = bitcast <32 x i1> %5 to i32
17268 define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
17269 ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17270 ; VLX: # %bb.0: # %entry
17271 ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17272 ; VLX-NEXT: kmovd %k0, %eax
17273 ; VLX-NEXT: vzeroupper
17276 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
17277 ; NoVLX: # %bb.0: # %entry
17278 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17279 ; NoVLX-NEXT: kmovw %k0, %eax
17280 ; NoVLX-NEXT: vzeroupper
17283 %0 = bitcast <8 x i64> %__a to <16 x i32>
17284 %load = load i32, i32* %__b
17285 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17286 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17287 %2 = icmp ult <16 x i32> %0, %1
17288 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17289 %4 = bitcast <32 x i1> %3 to i32
17293 define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
17294 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17295 ; VLX: # %bb.0: # %entry
17296 ; VLX-NEXT: kmovd %edi, %k1
17297 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17298 ; VLX-NEXT: kmovd %k0, %eax
17299 ; VLX-NEXT: vzeroupper
17302 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
17303 ; NoVLX: # %bb.0: # %entry
17304 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0
17305 ; NoVLX-NEXT: kmovw %k0, %eax
17306 ; NoVLX-NEXT: andl %edi, %eax
17307 ; NoVLX-NEXT: vzeroupper
17310 %0 = bitcast <8 x i64> %__a to <16 x i32>
17311 %load = load i32, i32* %__b
17312 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17313 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17314 %2 = icmp ult <16 x i32> %0, %1
17315 %3 = bitcast i16 %__u to <16 x i1>
17316 %4 = and <16 x i1> %3, %2
17317 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17318 %6 = bitcast <32 x i1> %5 to i32
17323 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17324 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17325 ; VLX: # %bb.0: # %entry
17326 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17327 ; VLX-NEXT: kmovq %k0, %rax
17328 ; VLX-NEXT: vzeroupper
17331 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
17332 ; NoVLX: # %bb.0: # %entry
17333 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17334 ; NoVLX-NEXT: kmovw %k0, %eax
17335 ; NoVLX-NEXT: movzwl %ax, %eax
17336 ; NoVLX-NEXT: vzeroupper
17339 %0 = bitcast <8 x i64> %__a to <16 x i32>
17340 %1 = bitcast <8 x i64> %__b to <16 x i32>
17341 %2 = icmp ult <16 x i32> %0, %1
17342 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17343 %4 = bitcast <64 x i1> %3 to i64
17347 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17348 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17349 ; VLX: # %bb.0: # %entry
17350 ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17351 ; VLX-NEXT: kmovq %k0, %rax
17352 ; VLX-NEXT: vzeroupper
17355 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
17356 ; NoVLX: # %bb.0: # %entry
17357 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
17358 ; NoVLX-NEXT: kmovw %k0, %eax
17359 ; NoVLX-NEXT: movzwl %ax, %eax
17360 ; NoVLX-NEXT: vzeroupper
17363 %0 = bitcast <8 x i64> %__a to <16 x i32>
17364 %load = load <8 x i64>, <8 x i64>* %__b
17365 %1 = bitcast <8 x i64> %load to <16 x i32>
17366 %2 = icmp ult <16 x i32> %0, %1
17367 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17368 %4 = bitcast <64 x i1> %3 to i64
17372 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
17373 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17374 ; VLX: # %bb.0: # %entry
17375 ; VLX-NEXT: kmovd %edi, %k1
17376 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
17377 ; VLX-NEXT: kmovq %k0, %rax
17378 ; VLX-NEXT: vzeroupper
17381 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
17382 ; NoVLX: # %bb.0: # %entry
17383 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
17384 ; NoVLX-NEXT: kmovw %k0, %eax
17385 ; NoVLX-NEXT: andl %edi, %eax
17386 ; NoVLX-NEXT: vzeroupper
17389 %0 = bitcast <8 x i64> %__a to <16 x i32>
17390 %1 = bitcast <8 x i64> %__b to <16 x i32>
17391 %2 = icmp ult <16 x i32> %0, %1
17392 %3 = bitcast i16 %__u to <16 x i1>
17393 %4 = and <16 x i1> %2, %3
17394 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17395 %6 = bitcast <64 x i1> %5 to i64
17399 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
17400 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17401 ; VLX: # %bb.0: # %entry
17402 ; VLX-NEXT: kmovd %edi, %k1
17403 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
17404 ; VLX-NEXT: kmovq %k0, %rax
17405 ; VLX-NEXT: vzeroupper
17408 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
17409 ; NoVLX: # %bb.0: # %entry
17410 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0
17411 ; NoVLX-NEXT: kmovw %k0, %eax
17412 ; NoVLX-NEXT: andl %edi, %eax
17413 ; NoVLX-NEXT: vzeroupper
17416 %0 = bitcast <8 x i64> %__a to <16 x i32>
17417 %load = load <8 x i64>, <8 x i64>* %__b
17418 %1 = bitcast <8 x i64> %load to <16 x i32>
17419 %2 = icmp ult <16 x i32> %0, %1
17420 %3 = bitcast i16 %__u to <16 x i1>
17421 %4 = and <16 x i1> %2, %3
17422 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17423 %6 = bitcast <64 x i1> %5 to i64
17428 define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
17429 ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17430 ; VLX: # %bb.0: # %entry
17431 ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17432 ; VLX-NEXT: kmovq %k0, %rax
17433 ; VLX-NEXT: vzeroupper
17436 ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
17437 ; NoVLX: # %bb.0: # %entry
17438 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
17439 ; NoVLX-NEXT: kmovw %k0, %eax
17440 ; NoVLX-NEXT: movzwl %ax, %eax
17441 ; NoVLX-NEXT: vzeroupper
17444 %0 = bitcast <8 x i64> %__a to <16 x i32>
17445 %load = load i32, i32* %__b
17446 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17447 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17448 %2 = icmp ult <16 x i32> %0, %1
17449 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17450 %4 = bitcast <64 x i1> %3 to i64
17454 define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
17455 ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17456 ; VLX: # %bb.0: # %entry
17457 ; VLX-NEXT: kmovd %edi, %k1
17458 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
17459 ; VLX-NEXT: kmovq %k0, %rax
17460 ; VLX-NEXT: vzeroupper
17463 ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
17464 ; NoVLX: # %bb.0: # %entry
17465 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0
17466 ; NoVLX-NEXT: kmovw %k0, %eax
17467 ; NoVLX-NEXT: andl %edi, %eax
17468 ; NoVLX-NEXT: vzeroupper
17471 %0 = bitcast <8 x i64> %__a to <16 x i32>
17472 %load = load i32, i32* %__b
17473 %vec = insertelement <16 x i32> undef, i32 %load, i32 0
17474 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
17475 %2 = icmp ult <16 x i32> %0, %1
17476 %3 = bitcast i16 %__u to <16 x i1>
17477 %4 = and <16 x i1> %3, %2
17478 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
17479 %6 = bitcast <64 x i1> %5 to i64
17484 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17485 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17486 ; VLX: # %bb.0: # %entry
17487 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17488 ; VLX-NEXT: kmovb %k0, %eax
17491 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
17492 ; NoVLX: # %bb.0: # %entry
17493 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17494 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17495 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17496 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17497 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17498 ; NoVLX-NEXT: kmovw %k0, %eax
17499 ; NoVLX-NEXT: andl $3, %eax
17500 ; NoVLX-NEXT: vzeroupper
17503 %0 = bitcast <2 x i64> %__a to <2 x i64>
17504 %1 = bitcast <2 x i64> %__b to <2 x i64>
17505 %2 = icmp ult <2 x i64> %0, %1
17506 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17507 %4 = bitcast <4 x i1> %3 to i4
17511 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17512 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17513 ; VLX: # %bb.0: # %entry
17514 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17515 ; VLX-NEXT: kmovb %k0, %eax
17518 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
17519 ; NoVLX: # %bb.0: # %entry
17520 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17521 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17522 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17523 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17524 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17525 ; NoVLX-NEXT: kmovw %k0, %eax
17526 ; NoVLX-NEXT: andl $3, %eax
17527 ; NoVLX-NEXT: vzeroupper
17530 %0 = bitcast <2 x i64> %__a to <2 x i64>
17531 %load = load <2 x i64>, <2 x i64>* %__b
17532 %1 = bitcast <2 x i64> %load to <2 x i64>
17533 %2 = icmp ult <2 x i64> %0, %1
17534 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17535 %4 = bitcast <4 x i1> %3 to i4
17539 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17540 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17541 ; VLX: # %bb.0: # %entry
17542 ; VLX-NEXT: kmovd %edi, %k1
17543 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17544 ; VLX-NEXT: kmovb %k0, %eax
17547 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
17548 ; NoVLX: # %bb.0: # %entry
17549 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17550 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17551 ; NoVLX-NEXT: kmovw %edi, %k1
17552 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17553 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17554 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17555 ; NoVLX-NEXT: kmovw %k0, %eax
17556 ; NoVLX-NEXT: andl $3, %eax
17557 ; NoVLX-NEXT: vzeroupper
17560 %0 = bitcast <2 x i64> %__a to <2 x i64>
17561 %1 = bitcast <2 x i64> %__b to <2 x i64>
17562 %2 = icmp ult <2 x i64> %0, %1
17563 %3 = bitcast i8 %__u to <8 x i1>
17564 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17565 %4 = and <2 x i1> %2, %extract.i
17566 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17567 %6 = bitcast <4 x i1> %5 to i4
17571 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17572 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17573 ; VLX: # %bb.0: # %entry
17574 ; VLX-NEXT: kmovd %edi, %k1
17575 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17576 ; VLX-NEXT: kmovb %k0, %eax
17579 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
17580 ; NoVLX: # %bb.0: # %entry
17581 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17582 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17583 ; NoVLX-NEXT: kmovw %edi, %k1
17584 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17585 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17586 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17587 ; NoVLX-NEXT: kmovw %k0, %eax
17588 ; NoVLX-NEXT: andl $3, %eax
17589 ; NoVLX-NEXT: vzeroupper
17592 %0 = bitcast <2 x i64> %__a to <2 x i64>
17593 %load = load <2 x i64>, <2 x i64>* %__b
17594 %1 = bitcast <2 x i64> %load to <2 x i64>
17595 %2 = icmp ult <2 x i64> %0, %1
17596 %3 = bitcast i8 %__u to <8 x i1>
17597 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17598 %4 = and <2 x i1> %2, %extract.i
17599 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17600 %6 = bitcast <4 x i1> %5 to i4
17605 define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17606 ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17607 ; VLX: # %bb.0: # %entry
17608 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17609 ; VLX-NEXT: kmovb %k0, %eax
17612 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
17613 ; NoVLX: # %bb.0: # %entry
17614 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17615 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
17616 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17617 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17618 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17619 ; NoVLX-NEXT: kmovw %k0, %eax
17620 ; NoVLX-NEXT: andl $3, %eax
17621 ; NoVLX-NEXT: vzeroupper
17624 %0 = bitcast <2 x i64> %__a to <2 x i64>
17625 %load = load i64, i64* %__b
17626 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17627 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17628 %2 = icmp ult <2 x i64> %0, %1
17629 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17630 %4 = bitcast <4 x i1> %3 to i4
17634 define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17635 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17636 ; VLX: # %bb.0: # %entry
17637 ; VLX-NEXT: kmovd %edi, %k1
17638 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17639 ; VLX-NEXT: kmovb %k0, %eax
17642 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
17643 ; NoVLX: # %bb.0: # %entry
17644 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17645 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
17646 ; NoVLX-NEXT: kmovw %edi, %k1
17647 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17648 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17649 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17650 ; NoVLX-NEXT: kmovw %k0, %eax
17651 ; NoVLX-NEXT: andl $3, %eax
17652 ; NoVLX-NEXT: vzeroupper
17655 %0 = bitcast <2 x i64> %__a to <2 x i64>
17656 %load = load i64, i64* %__b
17657 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17658 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17659 %2 = icmp ult <2 x i64> %0, %1
17660 %3 = bitcast i8 %__u to <8 x i1>
17661 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17662 %4 = and <2 x i1> %extract.i, %2
17663 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
17664 %6 = bitcast <4 x i1> %5 to i4
17669 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17670 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17671 ; VLX: # %bb.0: # %entry
17672 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17673 ; VLX-NEXT: kmovd %k0, %eax
17674 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17677 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
17678 ; NoVLX: # %bb.0: # %entry
17679 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17680 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17681 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17682 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17683 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17684 ; NoVLX-NEXT: kmovw %k0, %eax
17685 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17686 ; NoVLX-NEXT: vzeroupper
17689 %0 = bitcast <2 x i64> %__a to <2 x i64>
17690 %1 = bitcast <2 x i64> %__b to <2 x i64>
17691 %2 = icmp ult <2 x i64> %0, %1
17692 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17693 %4 = bitcast <8 x i1> %3 to i8
17697 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17698 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17699 ; VLX: # %bb.0: # %entry
17700 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17701 ; VLX-NEXT: kmovd %k0, %eax
17702 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17705 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
17706 ; NoVLX: # %bb.0: # %entry
17707 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17708 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17709 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17710 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17711 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17712 ; NoVLX-NEXT: kmovw %k0, %eax
17713 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17714 ; NoVLX-NEXT: vzeroupper
17717 %0 = bitcast <2 x i64> %__a to <2 x i64>
17718 %load = load <2 x i64>, <2 x i64>* %__b
17719 %1 = bitcast <2 x i64> %load to <2 x i64>
17720 %2 = icmp ult <2 x i64> %0, %1
17721 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17722 %4 = bitcast <8 x i1> %3 to i8
17726 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17727 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17728 ; VLX: # %bb.0: # %entry
17729 ; VLX-NEXT: kmovd %edi, %k1
17730 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17731 ; VLX-NEXT: kmovd %k0, %eax
17732 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17735 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
17736 ; NoVLX: # %bb.0: # %entry
17737 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17738 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17739 ; NoVLX-NEXT: kmovw %edi, %k1
17740 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17741 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17742 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17743 ; NoVLX-NEXT: kmovw %k0, %eax
17744 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17745 ; NoVLX-NEXT: vzeroupper
17748 %0 = bitcast <2 x i64> %__a to <2 x i64>
17749 %1 = bitcast <2 x i64> %__b to <2 x i64>
17750 %2 = icmp ult <2 x i64> %0, %1
17751 %3 = bitcast i8 %__u to <8 x i1>
17752 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17753 %4 = and <2 x i1> %2, %extract.i
17754 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17755 %6 = bitcast <8 x i1> %5 to i8
17759 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17760 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17761 ; VLX: # %bb.0: # %entry
17762 ; VLX-NEXT: kmovd %edi, %k1
17763 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17764 ; VLX-NEXT: kmovd %k0, %eax
17765 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17768 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
17769 ; NoVLX: # %bb.0: # %entry
17770 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17771 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17772 ; NoVLX-NEXT: kmovw %edi, %k1
17773 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17774 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17775 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17776 ; NoVLX-NEXT: kmovw %k0, %eax
17777 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17778 ; NoVLX-NEXT: vzeroupper
17781 %0 = bitcast <2 x i64> %__a to <2 x i64>
17782 %load = load <2 x i64>, <2 x i64>* %__b
17783 %1 = bitcast <2 x i64> %load to <2 x i64>
17784 %2 = icmp ult <2 x i64> %0, %1
17785 %3 = bitcast i8 %__u to <8 x i1>
17786 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17787 %4 = and <2 x i1> %2, %extract.i
17788 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17789 %6 = bitcast <8 x i1> %5 to i8
17794 define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17795 ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17796 ; VLX: # %bb.0: # %entry
17797 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17798 ; VLX-NEXT: kmovd %k0, %eax
17799 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17802 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
17803 ; NoVLX: # %bb.0: # %entry
17804 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17805 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
17806 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17807 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17808 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17809 ; NoVLX-NEXT: kmovw %k0, %eax
17810 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17811 ; NoVLX-NEXT: vzeroupper
17814 %0 = bitcast <2 x i64> %__a to <2 x i64>
17815 %load = load i64, i64* %__b
17816 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17817 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17818 %2 = icmp ult <2 x i64> %0, %1
17819 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17820 %4 = bitcast <8 x i1> %3 to i8
17824 define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
17825 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17826 ; VLX: # %bb.0: # %entry
17827 ; VLX-NEXT: kmovd %edi, %k1
17828 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
17829 ; VLX-NEXT: kmovd %k0, %eax
17830 ; VLX-NEXT: # kill: def $al killed $al killed $eax
17833 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
17834 ; NoVLX: # %bb.0: # %entry
17835 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17836 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
17837 ; NoVLX-NEXT: kmovw %edi, %k1
17838 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17839 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17840 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17841 ; NoVLX-NEXT: kmovw %k0, %eax
17842 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
17843 ; NoVLX-NEXT: vzeroupper
17846 %0 = bitcast <2 x i64> %__a to <2 x i64>
17847 %load = load i64, i64* %__b
17848 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
17849 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
17850 %2 = icmp ult <2 x i64> %0, %1
17851 %3 = bitcast i8 %__u to <8 x i1>
17852 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17853 %4 = and <2 x i1> %extract.i, %2
17854 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17855 %6 = bitcast <8 x i1> %5 to i8
17860 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17861 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17862 ; VLX: # %bb.0: # %entry
17863 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
17864 ; VLX-NEXT: kmovd %k0, %eax
17865 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17868 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
17869 ; NoVLX: # %bb.0: # %entry
17870 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17871 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17872 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17873 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17874 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17875 ; NoVLX-NEXT: kmovw %k0, %eax
17876 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17877 ; NoVLX-NEXT: vzeroupper
17880 %0 = bitcast <2 x i64> %__a to <2 x i64>
17881 %1 = bitcast <2 x i64> %__b to <2 x i64>
17882 %2 = icmp ult <2 x i64> %0, %1
17883 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17884 %4 = bitcast <16 x i1> %3 to i16
17888 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17889 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17890 ; VLX: # %bb.0: # %entry
17891 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
17892 ; VLX-NEXT: kmovd %k0, %eax
17893 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17896 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
17897 ; NoVLX: # %bb.0: # %entry
17898 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17899 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
17900 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17901 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17902 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17903 ; NoVLX-NEXT: kmovw %k0, %eax
17904 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17905 ; NoVLX-NEXT: vzeroupper
17908 %0 = bitcast <2 x i64> %__a to <2 x i64>
17909 %load = load <2 x i64>, <2 x i64>* %__b
17910 %1 = bitcast <2 x i64> %load to <2 x i64>
17911 %2 = icmp ult <2 x i64> %0, %1
17912 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17913 %4 = bitcast <16 x i1> %3 to i16
17917 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
17918 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
17919 ; VLX: # %bb.0: # %entry
17920 ; VLX-NEXT: kmovd %edi, %k1
17921 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
17922 ; VLX-NEXT: kmovd %k0, %eax
17923 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17926 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
17927 ; NoVLX: # %bb.0: # %entry
17928 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
17929 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17930 ; NoVLX-NEXT: kmovw %edi, %k1
17931 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17932 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17933 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17934 ; NoVLX-NEXT: kmovw %k0, %eax
17935 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17936 ; NoVLX-NEXT: vzeroupper
17939 %0 = bitcast <2 x i64> %__a to <2 x i64>
17940 %1 = bitcast <2 x i64> %__b to <2 x i64>
17941 %2 = icmp ult <2 x i64> %0, %1
17942 %3 = bitcast i8 %__u to <8 x i1>
17943 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17944 %4 = and <2 x i1> %2, %extract.i
17945 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17946 %6 = bitcast <16 x i1> %5 to i16
17950 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
17951 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
17952 ; VLX: # %bb.0: # %entry
17953 ; VLX-NEXT: kmovd %edi, %k1
17954 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
17955 ; VLX-NEXT: kmovd %k0, %eax
17956 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17959 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
17960 ; NoVLX: # %bb.0: # %entry
17961 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17962 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
17963 ; NoVLX-NEXT: kmovw %edi, %k1
17964 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
17965 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17966 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
17967 ; NoVLX-NEXT: kmovw %k0, %eax
17968 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
17969 ; NoVLX-NEXT: vzeroupper
17972 %0 = bitcast <2 x i64> %__a to <2 x i64>
17973 %load = load <2 x i64>, <2 x i64>* %__b
17974 %1 = bitcast <2 x i64> %load to <2 x i64>
17975 %2 = icmp ult <2 x i64> %0, %1
17976 %3 = bitcast i8 %__u to <8 x i1>
17977 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
17978 %4 = and <2 x i1> %2, %extract.i
17979 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
17980 %6 = bitcast <16 x i1> %5 to i16
17985 define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
17986 ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
17987 ; VLX: # %bb.0: # %entry
17988 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
17989 ; VLX-NEXT: kmovd %k0, %eax
17990 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
17993 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
17994 ; NoVLX: # %bb.0: # %entry
17995 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17996 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
17997 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
17998 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
17999 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18000 ; NoVLX-NEXT: kmovw %k0, %eax
18001 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18002 ; NoVLX-NEXT: vzeroupper
18005 %0 = bitcast <2 x i64> %__a to <2 x i64>
18006 %load = load i64, i64* %__b
18007 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18008 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18009 %2 = icmp ult <2 x i64> %0, %1
18010 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18011 %4 = bitcast <16 x i1> %3 to i16
18015 define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
18016 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
18017 ; VLX: # %bb.0: # %entry
18018 ; VLX-NEXT: kmovd %edi, %k1
18019 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18020 ; VLX-NEXT: kmovd %k0, %eax
18021 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18024 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
18025 ; NoVLX: # %bb.0: # %entry
18026 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18027 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
18028 ; NoVLX-NEXT: kmovw %edi, %k1
18029 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18030 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18031 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18032 ; NoVLX-NEXT: kmovw %k0, %eax
18033 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18034 ; NoVLX-NEXT: vzeroupper
18037 %0 = bitcast <2 x i64> %__a to <2 x i64>
18038 %load = load i64, i64* %__b
18039 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18040 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18041 %2 = icmp ult <2 x i64> %0, %1
18042 %3 = bitcast i8 %__u to <8 x i1>
18043 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18044 %4 = and <2 x i1> %extract.i, %2
18045 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18046 %6 = bitcast <16 x i1> %5 to i16
18051 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18052 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
18053 ; VLX: # %bb.0: # %entry
18054 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
18055 ; VLX-NEXT: kmovd %k0, %eax
18058 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
18059 ; NoVLX: # %bb.0: # %entry
18060 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
18061 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18062 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18063 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18064 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18065 ; NoVLX-NEXT: kmovw %k0, %eax
18066 ; NoVLX-NEXT: vzeroupper
18069 %0 = bitcast <2 x i64> %__a to <2 x i64>
18070 %1 = bitcast <2 x i64> %__b to <2 x i64>
18071 %2 = icmp ult <2 x i64> %0, %1
18072 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18073 %4 = bitcast <32 x i1> %3 to i32
18077 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18078 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
18079 ; VLX: # %bb.0: # %entry
18080 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
18081 ; VLX-NEXT: kmovd %k0, %eax
18084 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
18085 ; NoVLX: # %bb.0: # %entry
18086 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18087 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
18088 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18089 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18090 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18091 ; NoVLX-NEXT: kmovw %k0, %eax
18092 ; NoVLX-NEXT: vzeroupper
18095 %0 = bitcast <2 x i64> %__a to <2 x i64>
18096 %load = load <2 x i64>, <2 x i64>* %__b
18097 %1 = bitcast <2 x i64> %load to <2 x i64>
18098 %2 = icmp ult <2 x i64> %0, %1
18099 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18100 %4 = bitcast <32 x i1> %3 to i32
18104 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18105 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
18106 ; VLX: # %bb.0: # %entry
18107 ; VLX-NEXT: kmovd %edi, %k1
18108 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
18109 ; VLX-NEXT: kmovd %k0, %eax
18112 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
18113 ; NoVLX: # %bb.0: # %entry
18114 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
18115 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18116 ; NoVLX-NEXT: kmovw %edi, %k1
18117 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18118 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18119 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18120 ; NoVLX-NEXT: kmovw %k0, %eax
18121 ; NoVLX-NEXT: vzeroupper
18124 %0 = bitcast <2 x i64> %__a to <2 x i64>
18125 %1 = bitcast <2 x i64> %__b to <2 x i64>
18126 %2 = icmp ult <2 x i64> %0, %1
18127 %3 = bitcast i8 %__u to <8 x i1>
18128 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18129 %4 = and <2 x i1> %2, %extract.i
18130 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18131 %6 = bitcast <32 x i1> %5 to i32
18135 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18136 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
18137 ; VLX: # %bb.0: # %entry
18138 ; VLX-NEXT: kmovd %edi, %k1
18139 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
18140 ; VLX-NEXT: kmovd %k0, %eax
18143 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
18144 ; NoVLX: # %bb.0: # %entry
18145 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18146 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
18147 ; NoVLX-NEXT: kmovw %edi, %k1
18148 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18149 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18150 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18151 ; NoVLX-NEXT: kmovw %k0, %eax
18152 ; NoVLX-NEXT: vzeroupper
18155 %0 = bitcast <2 x i64> %__a to <2 x i64>
18156 %load = load <2 x i64>, <2 x i64>* %__b
18157 %1 = bitcast <2 x i64> %load to <2 x i64>
18158 %2 = icmp ult <2 x i64> %0, %1
18159 %3 = bitcast i8 %__u to <8 x i1>
18160 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18161 %4 = and <2 x i1> %2, %extract.i
18162 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18163 %6 = bitcast <32 x i1> %5 to i32
18168 define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
18169 ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
18170 ; VLX: # %bb.0: # %entry
18171 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
18172 ; VLX-NEXT: kmovd %k0, %eax
18175 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
18176 ; NoVLX: # %bb.0: # %entry
18177 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18178 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
18179 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18180 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18181 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18182 ; NoVLX-NEXT: kmovw %k0, %eax
18183 ; NoVLX-NEXT: vzeroupper
18186 %0 = bitcast <2 x i64> %__a to <2 x i64>
18187 %load = load i64, i64* %__b
18188 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18189 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18190 %2 = icmp ult <2 x i64> %0, %1
18191 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18192 %4 = bitcast <32 x i1> %3 to i32
18196 define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
18197 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
18198 ; VLX: # %bb.0: # %entry
18199 ; VLX-NEXT: kmovd %edi, %k1
18200 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18201 ; VLX-NEXT: kmovd %k0, %eax
18204 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
18205 ; NoVLX: # %bb.0: # %entry
18206 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18207 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
18208 ; NoVLX-NEXT: kmovw %edi, %k1
18209 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18210 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18211 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18212 ; NoVLX-NEXT: kmovw %k0, %eax
18213 ; NoVLX-NEXT: vzeroupper
18216 %0 = bitcast <2 x i64> %__a to <2 x i64>
18217 %load = load i64, i64* %__b
18218 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18219 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18220 %2 = icmp ult <2 x i64> %0, %1
18221 %3 = bitcast i8 %__u to <8 x i1>
18222 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18223 %4 = and <2 x i1> %extract.i, %2
18224 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18225 %6 = bitcast <32 x i1> %5 to i32
18230 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18231 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
18232 ; VLX: # %bb.0: # %entry
18233 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
18234 ; VLX-NEXT: kmovq %k0, %rax
18237 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
18238 ; NoVLX: # %bb.0: # %entry
18239 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
18240 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18241 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18242 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18243 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18244 ; NoVLX-NEXT: kmovw %k0, %eax
18245 ; NoVLX-NEXT: movzwl %ax, %eax
18246 ; NoVLX-NEXT: vzeroupper
18249 %0 = bitcast <2 x i64> %__a to <2 x i64>
18250 %1 = bitcast <2 x i64> %__b to <2 x i64>
18251 %2 = icmp ult <2 x i64> %0, %1
18252 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18253 %4 = bitcast <64 x i1> %3 to i64
18257 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18258 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
18259 ; VLX: # %bb.0: # %entry
18260 ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
18261 ; VLX-NEXT: kmovq %k0, %rax
18264 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
18265 ; NoVLX: # %bb.0: # %entry
18266 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18267 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
18268 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18269 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18270 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18271 ; NoVLX-NEXT: kmovw %k0, %eax
18272 ; NoVLX-NEXT: movzwl %ax, %eax
18273 ; NoVLX-NEXT: vzeroupper
18276 %0 = bitcast <2 x i64> %__a to <2 x i64>
18277 %load = load <2 x i64>, <2 x i64>* %__b
18278 %1 = bitcast <2 x i64> %load to <2 x i64>
18279 %2 = icmp ult <2 x i64> %0, %1
18280 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18281 %4 = bitcast <64 x i1> %3 to i64
18285 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
18286 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18287 ; VLX: # %bb.0: # %entry
18288 ; VLX-NEXT: kmovd %edi, %k1
18289 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
18290 ; VLX-NEXT: kmovq %k0, %rax
18293 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
18294 ; NoVLX: # %bb.0: # %entry
18295 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
18296 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18297 ; NoVLX-NEXT: kmovw %edi, %k1
18298 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18299 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18300 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18301 ; NoVLX-NEXT: kmovw %k0, %eax
18302 ; NoVLX-NEXT: movzwl %ax, %eax
18303 ; NoVLX-NEXT: vzeroupper
18306 %0 = bitcast <2 x i64> %__a to <2 x i64>
18307 %1 = bitcast <2 x i64> %__b to <2 x i64>
18308 %2 = icmp ult <2 x i64> %0, %1
18309 %3 = bitcast i8 %__u to <8 x i1>
18310 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18311 %4 = and <2 x i1> %2, %extract.i
18312 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18313 %6 = bitcast <64 x i1> %5 to i64
18317 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
18318 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18319 ; VLX: # %bb.0: # %entry
18320 ; VLX-NEXT: kmovd %edi, %k1
18321 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
18322 ; VLX-NEXT: kmovq %k0, %rax
18325 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
18326 ; NoVLX: # %bb.0: # %entry
18327 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18328 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
18329 ; NoVLX-NEXT: kmovw %edi, %k1
18330 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18331 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18332 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18333 ; NoVLX-NEXT: kmovw %k0, %eax
18334 ; NoVLX-NEXT: movzwl %ax, %eax
18335 ; NoVLX-NEXT: vzeroupper
18338 %0 = bitcast <2 x i64> %__a to <2 x i64>
18339 %load = load <2 x i64>, <2 x i64>* %__b
18340 %1 = bitcast <2 x i64> %load to <2 x i64>
18341 %2 = icmp ult <2 x i64> %0, %1
18342 %3 = bitcast i8 %__u to <8 x i1>
18343 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18344 %4 = and <2 x i1> %2, %extract.i
18345 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18346 %6 = bitcast <64 x i1> %5 to i64
18351 define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
18352 ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18353 ; VLX: # %bb.0: # %entry
18354 ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
18355 ; VLX-NEXT: kmovq %k0, %rax
18358 ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
18359 ; NoVLX: # %bb.0: # %entry
18360 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18361 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
18362 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18363 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18364 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18365 ; NoVLX-NEXT: kmovw %k0, %eax
18366 ; NoVLX-NEXT: movzwl %ax, %eax
18367 ; NoVLX-NEXT: vzeroupper
18370 %0 = bitcast <2 x i64> %__a to <2 x i64>
18371 %load = load i64, i64* %__b
18372 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18373 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18374 %2 = icmp ult <2 x i64> %0, %1
18375 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18376 %4 = bitcast <64 x i1> %3 to i64
18380 define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
18381 ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18382 ; VLX: # %bb.0: # %entry
18383 ; VLX-NEXT: kmovd %edi, %k1
18384 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
18385 ; VLX-NEXT: kmovq %k0, %rax
18388 ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
18389 ; NoVLX: # %bb.0: # %entry
18390 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18391 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
18392 ; NoVLX-NEXT: kmovw %edi, %k1
18393 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18394 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
18395 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
18396 ; NoVLX-NEXT: kmovw %k0, %eax
18397 ; NoVLX-NEXT: movzwl %ax, %eax
18398 ; NoVLX-NEXT: vzeroupper
18401 %0 = bitcast <2 x i64> %__a to <2 x i64>
18402 %load = load i64, i64* %__b
18403 %vec = insertelement <2 x i64> undef, i64 %load, i32 0
18404 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
18405 %2 = icmp ult <2 x i64> %0, %1
18406 %3 = bitcast i8 %__u to <8 x i1>
18407 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
18408 %4 = and <2 x i1> %extract.i, %2
18409 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
18410 %6 = bitcast <64 x i1> %5 to i64
18415 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18416 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18417 ; VLX: # %bb.0: # %entry
18418 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18419 ; VLX-NEXT: kmovd %k0, %eax
18420 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18421 ; VLX-NEXT: vzeroupper
18424 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
18425 ; NoVLX: # %bb.0: # %entry
18426 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18427 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18428 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18429 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18430 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18431 ; NoVLX-NEXT: kmovw %k0, %eax
18432 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18433 ; NoVLX-NEXT: vzeroupper
18436 %0 = bitcast <4 x i64> %__a to <4 x i64>
18437 %1 = bitcast <4 x i64> %__b to <4 x i64>
18438 %2 = icmp ult <4 x i64> %0, %1
18439 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18440 %4 = bitcast <8 x i1> %3 to i8
18444 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18445 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18446 ; VLX: # %bb.0: # %entry
18447 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18448 ; VLX-NEXT: kmovd %k0, %eax
18449 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18450 ; VLX-NEXT: vzeroupper
18453 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
18454 ; NoVLX: # %bb.0: # %entry
18455 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18456 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18457 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18458 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18459 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18460 ; NoVLX-NEXT: kmovw %k0, %eax
18461 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18462 ; NoVLX-NEXT: vzeroupper
18465 %0 = bitcast <4 x i64> %__a to <4 x i64>
18466 %load = load <4 x i64>, <4 x i64>* %__b
18467 %1 = bitcast <4 x i64> %load to <4 x i64>
18468 %2 = icmp ult <4 x i64> %0, %1
18469 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18470 %4 = bitcast <8 x i1> %3 to i8
18474 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18475 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18476 ; VLX: # %bb.0: # %entry
18477 ; VLX-NEXT: kmovd %edi, %k1
18478 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18479 ; VLX-NEXT: kmovd %k0, %eax
18480 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18481 ; VLX-NEXT: vzeroupper
18484 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
18485 ; NoVLX: # %bb.0: # %entry
18486 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18487 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18488 ; NoVLX-NEXT: kmovw %edi, %k1
18489 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18490 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18491 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18492 ; NoVLX-NEXT: kmovw %k0, %eax
18493 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18494 ; NoVLX-NEXT: vzeroupper
18497 %0 = bitcast <4 x i64> %__a to <4 x i64>
18498 %1 = bitcast <4 x i64> %__b to <4 x i64>
18499 %2 = icmp ult <4 x i64> %0, %1
18500 %3 = bitcast i8 %__u to <8 x i1>
18501 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18502 %4 = and <4 x i1> %2, %extract.i
18503 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18504 %6 = bitcast <8 x i1> %5 to i8
18508 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18509 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18510 ; VLX: # %bb.0: # %entry
18511 ; VLX-NEXT: kmovd %edi, %k1
18512 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18513 ; VLX-NEXT: kmovd %k0, %eax
18514 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18515 ; VLX-NEXT: vzeroupper
18518 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
18519 ; NoVLX: # %bb.0: # %entry
18520 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18521 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18522 ; NoVLX-NEXT: kmovw %edi, %k1
18523 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18524 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18525 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18526 ; NoVLX-NEXT: kmovw %k0, %eax
18527 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18528 ; NoVLX-NEXT: vzeroupper
18531 %0 = bitcast <4 x i64> %__a to <4 x i64>
18532 %load = load <4 x i64>, <4 x i64>* %__b
18533 %1 = bitcast <4 x i64> %load to <4 x i64>
18534 %2 = icmp ult <4 x i64> %0, %1
18535 %3 = bitcast i8 %__u to <8 x i1>
18536 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18537 %4 = and <4 x i1> %2, %extract.i
18538 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18539 %6 = bitcast <8 x i1> %5 to i8
18544 define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18545 ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18546 ; VLX: # %bb.0: # %entry
18547 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18548 ; VLX-NEXT: kmovd %k0, %eax
18549 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18550 ; VLX-NEXT: vzeroupper
18553 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
18554 ; NoVLX: # %bb.0: # %entry
18555 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18556 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
18557 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18558 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18559 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18560 ; NoVLX-NEXT: kmovw %k0, %eax
18561 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18562 ; NoVLX-NEXT: vzeroupper
18565 %0 = bitcast <4 x i64> %__a to <4 x i64>
18566 %load = load i64, i64* %__b
18567 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18568 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18569 %2 = icmp ult <4 x i64> %0, %1
18570 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18571 %4 = bitcast <8 x i1> %3 to i8
18575 define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18576 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18577 ; VLX: # %bb.0: # %entry
18578 ; VLX-NEXT: kmovd %edi, %k1
18579 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18580 ; VLX-NEXT: kmovd %k0, %eax
18581 ; VLX-NEXT: # kill: def $al killed $al killed $eax
18582 ; VLX-NEXT: vzeroupper
18585 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
18586 ; NoVLX: # %bb.0: # %entry
18587 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18588 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
18589 ; NoVLX-NEXT: kmovw %edi, %k1
18590 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18591 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18592 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18593 ; NoVLX-NEXT: kmovw %k0, %eax
18594 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
18595 ; NoVLX-NEXT: vzeroupper
18598 %0 = bitcast <4 x i64> %__a to <4 x i64>
18599 %load = load i64, i64* %__b
18600 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18601 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18602 %2 = icmp ult <4 x i64> %0, %1
18603 %3 = bitcast i8 %__u to <8 x i1>
18604 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18605 %4 = and <4 x i1> %extract.i, %2
18606 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18607 %6 = bitcast <8 x i1> %5 to i8
18612 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18613 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18614 ; VLX: # %bb.0: # %entry
18615 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18616 ; VLX-NEXT: kmovd %k0, %eax
18617 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18618 ; VLX-NEXT: vzeroupper
18621 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
18622 ; NoVLX: # %bb.0: # %entry
18623 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18624 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18625 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18626 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18627 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18628 ; NoVLX-NEXT: kmovw %k0, %eax
18629 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18630 ; NoVLX-NEXT: vzeroupper
18633 %0 = bitcast <4 x i64> %__a to <4 x i64>
18634 %1 = bitcast <4 x i64> %__b to <4 x i64>
18635 %2 = icmp ult <4 x i64> %0, %1
18636 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18637 %4 = bitcast <16 x i1> %3 to i16
18641 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18642 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18643 ; VLX: # %bb.0: # %entry
18644 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18645 ; VLX-NEXT: kmovd %k0, %eax
18646 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18647 ; VLX-NEXT: vzeroupper
18650 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
18651 ; NoVLX: # %bb.0: # %entry
18652 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18653 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18654 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18655 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18656 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18657 ; NoVLX-NEXT: kmovw %k0, %eax
18658 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18659 ; NoVLX-NEXT: vzeroupper
18662 %0 = bitcast <4 x i64> %__a to <4 x i64>
18663 %load = load <4 x i64>, <4 x i64>* %__b
18664 %1 = bitcast <4 x i64> %load to <4 x i64>
18665 %2 = icmp ult <4 x i64> %0, %1
18666 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18667 %4 = bitcast <16 x i1> %3 to i16
18671 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18672 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18673 ; VLX: # %bb.0: # %entry
18674 ; VLX-NEXT: kmovd %edi, %k1
18675 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18676 ; VLX-NEXT: kmovd %k0, %eax
18677 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18678 ; VLX-NEXT: vzeroupper
18681 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
18682 ; NoVLX: # %bb.0: # %entry
18683 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18684 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18685 ; NoVLX-NEXT: kmovw %edi, %k1
18686 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18687 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18688 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18689 ; NoVLX-NEXT: kmovw %k0, %eax
18690 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18691 ; NoVLX-NEXT: vzeroupper
18694 %0 = bitcast <4 x i64> %__a to <4 x i64>
18695 %1 = bitcast <4 x i64> %__b to <4 x i64>
18696 %2 = icmp ult <4 x i64> %0, %1
18697 %3 = bitcast i8 %__u to <8 x i1>
18698 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18699 %4 = and <4 x i1> %2, %extract.i
18700 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18701 %6 = bitcast <16 x i1> %5 to i16
18705 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18706 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18707 ; VLX: # %bb.0: # %entry
18708 ; VLX-NEXT: kmovd %edi, %k1
18709 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18710 ; VLX-NEXT: kmovd %k0, %eax
18711 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18712 ; VLX-NEXT: vzeroupper
18715 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
18716 ; NoVLX: # %bb.0: # %entry
18717 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18718 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18719 ; NoVLX-NEXT: kmovw %edi, %k1
18720 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18721 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18722 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18723 ; NoVLX-NEXT: kmovw %k0, %eax
18724 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18725 ; NoVLX-NEXT: vzeroupper
18728 %0 = bitcast <4 x i64> %__a to <4 x i64>
18729 %load = load <4 x i64>, <4 x i64>* %__b
18730 %1 = bitcast <4 x i64> %load to <4 x i64>
18731 %2 = icmp ult <4 x i64> %0, %1
18732 %3 = bitcast i8 %__u to <8 x i1>
18733 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18734 %4 = and <4 x i1> %2, %extract.i
18735 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18736 %6 = bitcast <16 x i1> %5 to i16
18741 define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18742 ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18743 ; VLX: # %bb.0: # %entry
18744 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18745 ; VLX-NEXT: kmovd %k0, %eax
18746 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18747 ; VLX-NEXT: vzeroupper
18750 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
18751 ; NoVLX: # %bb.0: # %entry
18752 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18753 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
18754 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18755 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18756 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18757 ; NoVLX-NEXT: kmovw %k0, %eax
18758 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18759 ; NoVLX-NEXT: vzeroupper
18762 %0 = bitcast <4 x i64> %__a to <4 x i64>
18763 %load = load i64, i64* %__b
18764 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18765 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18766 %2 = icmp ult <4 x i64> %0, %1
18767 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18768 %4 = bitcast <16 x i1> %3 to i16
18772 define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18773 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18774 ; VLX: # %bb.0: # %entry
18775 ; VLX-NEXT: kmovd %edi, %k1
18776 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18777 ; VLX-NEXT: kmovd %k0, %eax
18778 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
18779 ; VLX-NEXT: vzeroupper
18782 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
18783 ; NoVLX: # %bb.0: # %entry
18784 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18785 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
18786 ; NoVLX-NEXT: kmovw %edi, %k1
18787 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18788 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18789 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18790 ; NoVLX-NEXT: kmovw %k0, %eax
18791 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
18792 ; NoVLX-NEXT: vzeroupper
18795 %0 = bitcast <4 x i64> %__a to <4 x i64>
18796 %load = load i64, i64* %__b
18797 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18798 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18799 %2 = icmp ult <4 x i64> %0, %1
18800 %3 = bitcast i8 %__u to <8 x i1>
18801 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18802 %4 = and <4 x i1> %extract.i, %2
18803 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18804 %6 = bitcast <16 x i1> %5 to i16
18809 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18810 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18811 ; VLX: # %bb.0: # %entry
18812 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18813 ; VLX-NEXT: kmovd %k0, %eax
18814 ; VLX-NEXT: vzeroupper
18817 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
18818 ; NoVLX: # %bb.0: # %entry
18819 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18820 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18821 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18822 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18823 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18824 ; NoVLX-NEXT: kmovw %k0, %eax
18825 ; NoVLX-NEXT: vzeroupper
18828 %0 = bitcast <4 x i64> %__a to <4 x i64>
18829 %1 = bitcast <4 x i64> %__b to <4 x i64>
18830 %2 = icmp ult <4 x i64> %0, %1
18831 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18832 %4 = bitcast <32 x i1> %3 to i32
18836 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18837 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18838 ; VLX: # %bb.0: # %entry
18839 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
18840 ; VLX-NEXT: kmovd %k0, %eax
18841 ; VLX-NEXT: vzeroupper
18844 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
18845 ; NoVLX: # %bb.0: # %entry
18846 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18847 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
18848 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18849 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18850 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18851 ; NoVLX-NEXT: kmovw %k0, %eax
18852 ; NoVLX-NEXT: vzeroupper
18855 %0 = bitcast <4 x i64> %__a to <4 x i64>
18856 %load = load <4 x i64>, <4 x i64>* %__b
18857 %1 = bitcast <4 x i64> %load to <4 x i64>
18858 %2 = icmp ult <4 x i64> %0, %1
18859 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18860 %4 = bitcast <32 x i1> %3 to i32
18864 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18865 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18866 ; VLX: # %bb.0: # %entry
18867 ; VLX-NEXT: kmovd %edi, %k1
18868 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
18869 ; VLX-NEXT: kmovd %k0, %eax
18870 ; VLX-NEXT: vzeroupper
18873 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
18874 ; NoVLX: # %bb.0: # %entry
18875 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
18876 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18877 ; NoVLX-NEXT: kmovw %edi, %k1
18878 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18879 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18880 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18881 ; NoVLX-NEXT: kmovw %k0, %eax
18882 ; NoVLX-NEXT: vzeroupper
18885 %0 = bitcast <4 x i64> %__a to <4 x i64>
18886 %1 = bitcast <4 x i64> %__b to <4 x i64>
18887 %2 = icmp ult <4 x i64> %0, %1
18888 %3 = bitcast i8 %__u to <8 x i1>
18889 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18890 %4 = and <4 x i1> %2, %extract.i
18891 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18892 %6 = bitcast <32 x i1> %5 to i32
18896 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
18897 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18898 ; VLX: # %bb.0: # %entry
18899 ; VLX-NEXT: kmovd %edi, %k1
18900 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
18901 ; VLX-NEXT: kmovd %k0, %eax
18902 ; VLX-NEXT: vzeroupper
18905 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
18906 ; NoVLX: # %bb.0: # %entry
18907 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18908 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
18909 ; NoVLX-NEXT: kmovw %edi, %k1
18910 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18911 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18912 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18913 ; NoVLX-NEXT: kmovw %k0, %eax
18914 ; NoVLX-NEXT: vzeroupper
18917 %0 = bitcast <4 x i64> %__a to <4 x i64>
18918 %load = load <4 x i64>, <4 x i64>* %__b
18919 %1 = bitcast <4 x i64> %load to <4 x i64>
18920 %2 = icmp ult <4 x i64> %0, %1
18921 %3 = bitcast i8 %__u to <8 x i1>
18922 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18923 %4 = and <4 x i1> %2, %extract.i
18924 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18925 %6 = bitcast <32 x i1> %5 to i32
18930 define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
18931 ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
18932 ; VLX: # %bb.0: # %entry
18933 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
18934 ; VLX-NEXT: kmovd %k0, %eax
18935 ; VLX-NEXT: vzeroupper
18938 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
18939 ; NoVLX: # %bb.0: # %entry
18940 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18941 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
18942 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
18943 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18944 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18945 ; NoVLX-NEXT: kmovw %k0, %eax
18946 ; NoVLX-NEXT: vzeroupper
18949 %0 = bitcast <4 x i64> %__a to <4 x i64>
18950 %load = load i64, i64* %__b
18951 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18952 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18953 %2 = icmp ult <4 x i64> %0, %1
18954 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18955 %4 = bitcast <32 x i1> %3 to i32
18959 define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
18960 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
18961 ; VLX: # %bb.0: # %entry
18962 ; VLX-NEXT: kmovd %edi, %k1
18963 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
18964 ; VLX-NEXT: kmovd %k0, %eax
18965 ; VLX-NEXT: vzeroupper
18968 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
18969 ; NoVLX: # %bb.0: # %entry
18970 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
18971 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
18972 ; NoVLX-NEXT: kmovw %edi, %k1
18973 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
18974 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
18975 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
18976 ; NoVLX-NEXT: kmovw %k0, %eax
18977 ; NoVLX-NEXT: vzeroupper
18980 %0 = bitcast <4 x i64> %__a to <4 x i64>
18981 %load = load i64, i64* %__b
18982 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
18983 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18984 %2 = icmp ult <4 x i64> %0, %1
18985 %3 = bitcast i8 %__u to <8 x i1>
18986 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18987 %4 = and <4 x i1> %extract.i, %2
18988 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
18989 %6 = bitcast <32 x i1> %5 to i32
18994 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
18995 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
18996 ; VLX: # %bb.0: # %entry
18997 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
18998 ; VLX-NEXT: kmovq %k0, %rax
18999 ; VLX-NEXT: vzeroupper
19002 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
19003 ; NoVLX: # %bb.0: # %entry
19004 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
19005 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
19006 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19007 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19008 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19009 ; NoVLX-NEXT: kmovw %k0, %eax
19010 ; NoVLX-NEXT: movzwl %ax, %eax
19011 ; NoVLX-NEXT: vzeroupper
19014 %0 = bitcast <4 x i64> %__a to <4 x i64>
19015 %1 = bitcast <4 x i64> %__b to <4 x i64>
19016 %2 = icmp ult <4 x i64> %0, %1
19017 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19018 %4 = bitcast <64 x i1> %3 to i64
19022 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
19023 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
19024 ; VLX: # %bb.0: # %entry
19025 ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
19026 ; VLX-NEXT: kmovq %k0, %rax
19027 ; VLX-NEXT: vzeroupper
19030 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
19031 ; NoVLX: # %bb.0: # %entry
19032 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
19033 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
19034 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19035 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19036 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19037 ; NoVLX-NEXT: kmovw %k0, %eax
19038 ; NoVLX-NEXT: movzwl %ax, %eax
19039 ; NoVLX-NEXT: vzeroupper
19042 %0 = bitcast <4 x i64> %__a to <4 x i64>
19043 %load = load <4 x i64>, <4 x i64>* %__b
19044 %1 = bitcast <4 x i64> %load to <4 x i64>
19045 %2 = icmp ult <4 x i64> %0, %1
19046 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19047 %4 = bitcast <64 x i1> %3 to i64
19051 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
19052 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
19053 ; VLX: # %bb.0: # %entry
19054 ; VLX-NEXT: kmovd %edi, %k1
19055 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
19056 ; VLX-NEXT: kmovq %k0, %rax
19057 ; VLX-NEXT: vzeroupper
19060 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
19061 ; NoVLX: # %bb.0: # %entry
19062 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
19063 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
19064 ; NoVLX-NEXT: kmovw %edi, %k1
19065 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19066 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19067 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19068 ; NoVLX-NEXT: kmovw %k0, %eax
19069 ; NoVLX-NEXT: movzwl %ax, %eax
19070 ; NoVLX-NEXT: vzeroupper
19073 %0 = bitcast <4 x i64> %__a to <4 x i64>
19074 %1 = bitcast <4 x i64> %__b to <4 x i64>
19075 %2 = icmp ult <4 x i64> %0, %1
19076 %3 = bitcast i8 %__u to <8 x i1>
19077 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19078 %4 = and <4 x i1> %2, %extract.i
19079 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19080 %6 = bitcast <64 x i1> %5 to i64
19084 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
19085 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
19086 ; VLX: # %bb.0: # %entry
19087 ; VLX-NEXT: kmovd %edi, %k1
19088 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
19089 ; VLX-NEXT: kmovq %k0, %rax
19090 ; VLX-NEXT: vzeroupper
19093 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
19094 ; NoVLX: # %bb.0: # %entry
19095 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
19096 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
19097 ; NoVLX-NEXT: kmovw %edi, %k1
19098 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19099 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19100 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19101 ; NoVLX-NEXT: kmovw %k0, %eax
19102 ; NoVLX-NEXT: movzwl %ax, %eax
19103 ; NoVLX-NEXT: vzeroupper
19106 %0 = bitcast <4 x i64> %__a to <4 x i64>
19107 %load = load <4 x i64>, <4 x i64>* %__b
19108 %1 = bitcast <4 x i64> %load to <4 x i64>
19109 %2 = icmp ult <4 x i64> %0, %1
19110 %3 = bitcast i8 %__u to <8 x i1>
19111 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19112 %4 = and <4 x i1> %2, %extract.i
19113 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19114 %6 = bitcast <64 x i1> %5 to i64
19119 define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
19120 ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
19121 ; VLX: # %bb.0: # %entry
19122 ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
19123 ; VLX-NEXT: kmovq %k0, %rax
19124 ; VLX-NEXT: vzeroupper
19127 ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
19128 ; NoVLX: # %bb.0: # %entry
19129 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
19130 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
19131 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19132 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19133 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19134 ; NoVLX-NEXT: kmovw %k0, %eax
19135 ; NoVLX-NEXT: movzwl %ax, %eax
19136 ; NoVLX-NEXT: vzeroupper
19139 %0 = bitcast <4 x i64> %__a to <4 x i64>
19140 %load = load i64, i64* %__b
19141 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
19142 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19143 %2 = icmp ult <4 x i64> %0, %1
19144 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19145 %4 = bitcast <64 x i1> %3 to i64
19149 define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
19150 ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
19151 ; VLX: # %bb.0: # %entry
19152 ; VLX-NEXT: kmovd %edi, %k1
19153 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
19154 ; VLX-NEXT: kmovq %k0, %rax
19155 ; VLX-NEXT: vzeroupper
19158 ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
19159 ; NoVLX: # %bb.0: # %entry
19160 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
19161 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
19162 ; NoVLX-NEXT: kmovw %edi, %k1
19163 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19164 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19165 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19166 ; NoVLX-NEXT: kmovw %k0, %eax
19167 ; NoVLX-NEXT: movzwl %ax, %eax
19168 ; NoVLX-NEXT: vzeroupper
19171 %0 = bitcast <4 x i64> %__a to <4 x i64>
19172 %load = load i64, i64* %__b
19173 %vec = insertelement <4 x i64> undef, i64 %load, i32 0
19174 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19175 %2 = icmp ult <4 x i64> %0, %1
19176 %3 = bitcast i8 %__u to <8 x i1>
19177 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19178 %4 = and <4 x i1> %extract.i, %2
19179 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19180 %6 = bitcast <64 x i1> %5 to i64
19185 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19186 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
19187 ; VLX: # %bb.0: # %entry
19188 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19189 ; VLX-NEXT: kmovd %k0, %eax
19190 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19191 ; VLX-NEXT: vzeroupper
19194 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
19195 ; NoVLX: # %bb.0: # %entry
19196 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19197 ; NoVLX-NEXT: kmovw %k0, %eax
19198 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19199 ; NoVLX-NEXT: vzeroupper
19202 %0 = bitcast <8 x i64> %__a to <8 x i64>
19203 %1 = bitcast <8 x i64> %__b to <8 x i64>
19204 %2 = icmp ult <8 x i64> %0, %1
19205 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19206 %4 = bitcast <16 x i1> %3 to i16
19210 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19211 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
19212 ; VLX: # %bb.0: # %entry
19213 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19214 ; VLX-NEXT: kmovd %k0, %eax
19215 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19216 ; VLX-NEXT: vzeroupper
19219 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
19220 ; NoVLX: # %bb.0: # %entry
19221 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19222 ; NoVLX-NEXT: kmovw %k0, %eax
19223 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19224 ; NoVLX-NEXT: vzeroupper
19227 %0 = bitcast <8 x i64> %__a to <8 x i64>
19228 %load = load <8 x i64>, <8 x i64>* %__b
19229 %1 = bitcast <8 x i64> %load to <8 x i64>
19230 %2 = icmp ult <8 x i64> %0, %1
19231 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19232 %4 = bitcast <16 x i1> %3 to i16
19236 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19237 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
19238 ; VLX: # %bb.0: # %entry
19239 ; VLX-NEXT: kmovd %edi, %k1
19240 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19241 ; VLX-NEXT: kmovd %k0, %eax
19242 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19243 ; VLX-NEXT: vzeroupper
19246 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
19247 ; NoVLX: # %bb.0: # %entry
19248 ; NoVLX-NEXT: kmovw %edi, %k1
19249 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19250 ; NoVLX-NEXT: kmovw %k0, %eax
19251 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19252 ; NoVLX-NEXT: vzeroupper
19255 %0 = bitcast <8 x i64> %__a to <8 x i64>
19256 %1 = bitcast <8 x i64> %__b to <8 x i64>
19257 %2 = icmp ult <8 x i64> %0, %1
19258 %3 = bitcast i8 %__u to <8 x i1>
19259 %4 = and <8 x i1> %2, %3
19260 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19261 %6 = bitcast <16 x i1> %5 to i16
19265 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19266 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
19267 ; VLX: # %bb.0: # %entry
19268 ; VLX-NEXT: kmovd %edi, %k1
19269 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19270 ; VLX-NEXT: kmovd %k0, %eax
19271 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19272 ; VLX-NEXT: vzeroupper
19275 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
19276 ; NoVLX: # %bb.0: # %entry
19277 ; NoVLX-NEXT: kmovw %edi, %k1
19278 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19279 ; NoVLX-NEXT: kmovw %k0, %eax
19280 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19281 ; NoVLX-NEXT: vzeroupper
19284 %0 = bitcast <8 x i64> %__a to <8 x i64>
19285 %load = load <8 x i64>, <8 x i64>* %__b
19286 %1 = bitcast <8 x i64> %load to <8 x i64>
19287 %2 = icmp ult <8 x i64> %0, %1
19288 %3 = bitcast i8 %__u to <8 x i1>
19289 %4 = and <8 x i1> %2, %3
19290 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19291 %6 = bitcast <16 x i1> %5 to i16
19296 define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19297 ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
19298 ; VLX: # %bb.0: # %entry
19299 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19300 ; VLX-NEXT: kmovd %k0, %eax
19301 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19302 ; VLX-NEXT: vzeroupper
19305 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
19306 ; NoVLX: # %bb.0: # %entry
19307 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19308 ; NoVLX-NEXT: kmovw %k0, %eax
19309 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19310 ; NoVLX-NEXT: vzeroupper
19313 %0 = bitcast <8 x i64> %__a to <8 x i64>
19314 %load = load i64, i64* %__b
19315 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19316 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19317 %2 = icmp ult <8 x i64> %0, %1
19318 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19319 %4 = bitcast <16 x i1> %3 to i16
19323 define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19324 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19325 ; VLX: # %bb.0: # %entry
19326 ; VLX-NEXT: kmovd %edi, %k1
19327 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19328 ; VLX-NEXT: kmovd %k0, %eax
19329 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19330 ; VLX-NEXT: vzeroupper
19333 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
19334 ; NoVLX: # %bb.0: # %entry
19335 ; NoVLX-NEXT: kmovw %edi, %k1
19336 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19337 ; NoVLX-NEXT: kmovw %k0, %eax
19338 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19339 ; NoVLX-NEXT: vzeroupper
19342 %0 = bitcast <8 x i64> %__a to <8 x i64>
19343 %load = load i64, i64* %__b
19344 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19345 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19346 %2 = icmp ult <8 x i64> %0, %1
19347 %3 = bitcast i8 %__u to <8 x i1>
19348 %4 = and <8 x i1> %3, %2
19349 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19350 %6 = bitcast <16 x i1> %5 to i16
19355 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19356 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19357 ; VLX: # %bb.0: # %entry
19358 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19359 ; VLX-NEXT: kmovd %k0, %eax
19360 ; VLX-NEXT: vzeroupper
19363 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
19364 ; NoVLX: # %bb.0: # %entry
19365 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19366 ; NoVLX-NEXT: kmovw %k0, %eax
19367 ; NoVLX-NEXT: vzeroupper
19370 %0 = bitcast <8 x i64> %__a to <8 x i64>
19371 %1 = bitcast <8 x i64> %__b to <8 x i64>
19372 %2 = icmp ult <8 x i64> %0, %1
19373 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19374 %4 = bitcast <32 x i1> %3 to i32
19378 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19379 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19380 ; VLX: # %bb.0: # %entry
19381 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19382 ; VLX-NEXT: kmovd %k0, %eax
19383 ; VLX-NEXT: vzeroupper
19386 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
19387 ; NoVLX: # %bb.0: # %entry
19388 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19389 ; NoVLX-NEXT: kmovw %k0, %eax
19390 ; NoVLX-NEXT: vzeroupper
19393 %0 = bitcast <8 x i64> %__a to <8 x i64>
19394 %load = load <8 x i64>, <8 x i64>* %__b
19395 %1 = bitcast <8 x i64> %load to <8 x i64>
19396 %2 = icmp ult <8 x i64> %0, %1
19397 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19398 %4 = bitcast <32 x i1> %3 to i32
19402 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19403 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19404 ; VLX: # %bb.0: # %entry
19405 ; VLX-NEXT: kmovd %edi, %k1
19406 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19407 ; VLX-NEXT: kmovd %k0, %eax
19408 ; VLX-NEXT: vzeroupper
19411 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
19412 ; NoVLX: # %bb.0: # %entry
19413 ; NoVLX-NEXT: kmovw %edi, %k1
19414 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19415 ; NoVLX-NEXT: kmovw %k0, %eax
19416 ; NoVLX-NEXT: vzeroupper
19419 %0 = bitcast <8 x i64> %__a to <8 x i64>
19420 %1 = bitcast <8 x i64> %__b to <8 x i64>
19421 %2 = icmp ult <8 x i64> %0, %1
19422 %3 = bitcast i8 %__u to <8 x i1>
19423 %4 = and <8 x i1> %2, %3
19424 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19425 %6 = bitcast <32 x i1> %5 to i32
19429 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19430 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19431 ; VLX: # %bb.0: # %entry
19432 ; VLX-NEXT: kmovd %edi, %k1
19433 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19434 ; VLX-NEXT: kmovd %k0, %eax
19435 ; VLX-NEXT: vzeroupper
19438 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
19439 ; NoVLX: # %bb.0: # %entry
19440 ; NoVLX-NEXT: kmovw %edi, %k1
19441 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19442 ; NoVLX-NEXT: kmovw %k0, %eax
19443 ; NoVLX-NEXT: vzeroupper
19446 %0 = bitcast <8 x i64> %__a to <8 x i64>
19447 %load = load <8 x i64>, <8 x i64>* %__b
19448 %1 = bitcast <8 x i64> %load to <8 x i64>
19449 %2 = icmp ult <8 x i64> %0, %1
19450 %3 = bitcast i8 %__u to <8 x i1>
19451 %4 = and <8 x i1> %2, %3
19452 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19453 %6 = bitcast <32 x i1> %5 to i32
19458 define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19459 ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19460 ; VLX: # %bb.0: # %entry
19461 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19462 ; VLX-NEXT: kmovd %k0, %eax
19463 ; VLX-NEXT: vzeroupper
19466 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
19467 ; NoVLX: # %bb.0: # %entry
19468 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19469 ; NoVLX-NEXT: kmovw %k0, %eax
19470 ; NoVLX-NEXT: vzeroupper
19473 %0 = bitcast <8 x i64> %__a to <8 x i64>
19474 %load = load i64, i64* %__b
19475 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19476 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19477 %2 = icmp ult <8 x i64> %0, %1
19478 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19479 %4 = bitcast <32 x i1> %3 to i32
19483 define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19484 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19485 ; VLX: # %bb.0: # %entry
19486 ; VLX-NEXT: kmovd %edi, %k1
19487 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19488 ; VLX-NEXT: kmovd %k0, %eax
19489 ; VLX-NEXT: vzeroupper
19492 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
19493 ; NoVLX: # %bb.0: # %entry
19494 ; NoVLX-NEXT: kmovw %edi, %k1
19495 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19496 ; NoVLX-NEXT: kmovw %k0, %eax
19497 ; NoVLX-NEXT: vzeroupper
19500 %0 = bitcast <8 x i64> %__a to <8 x i64>
19501 %load = load i64, i64* %__b
19502 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19503 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19504 %2 = icmp ult <8 x i64> %0, %1
19505 %3 = bitcast i8 %__u to <8 x i1>
19506 %4 = and <8 x i1> %3, %2
19507 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19508 %6 = bitcast <32 x i1> %5 to i32
19513 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19514 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19515 ; VLX: # %bb.0: # %entry
19516 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19517 ; VLX-NEXT: kmovq %k0, %rax
19518 ; VLX-NEXT: vzeroupper
19521 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
19522 ; NoVLX: # %bb.0: # %entry
19523 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
19524 ; NoVLX-NEXT: kmovw %k0, %eax
19525 ; NoVLX-NEXT: movzwl %ax, %eax
19526 ; NoVLX-NEXT: vzeroupper
19529 %0 = bitcast <8 x i64> %__a to <8 x i64>
19530 %1 = bitcast <8 x i64> %__b to <8 x i64>
19531 %2 = icmp ult <8 x i64> %0, %1
19532 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19533 %4 = bitcast <64 x i1> %3 to i64
19537 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19538 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19539 ; VLX: # %bb.0: # %entry
19540 ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19541 ; VLX-NEXT: kmovq %k0, %rax
19542 ; VLX-NEXT: vzeroupper
19545 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
19546 ; NoVLX: # %bb.0: # %entry
19547 ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
19548 ; NoVLX-NEXT: kmovw %k0, %eax
19549 ; NoVLX-NEXT: movzwl %ax, %eax
19550 ; NoVLX-NEXT: vzeroupper
19553 %0 = bitcast <8 x i64> %__a to <8 x i64>
19554 %load = load <8 x i64>, <8 x i64>* %__b
19555 %1 = bitcast <8 x i64> %load to <8 x i64>
19556 %2 = icmp ult <8 x i64> %0, %1
19557 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19558 %4 = bitcast <64 x i1> %3 to i64
19562 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
19563 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19564 ; VLX: # %bb.0: # %entry
19565 ; VLX-NEXT: kmovd %edi, %k1
19566 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19567 ; VLX-NEXT: kmovq %k0, %rax
19568 ; VLX-NEXT: vzeroupper
19571 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
19572 ; NoVLX: # %bb.0: # %entry
19573 ; NoVLX-NEXT: kmovw %edi, %k1
19574 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
19575 ; NoVLX-NEXT: kmovw %k0, %eax
19576 ; NoVLX-NEXT: movzwl %ax, %eax
19577 ; NoVLX-NEXT: vzeroupper
19580 %0 = bitcast <8 x i64> %__a to <8 x i64>
19581 %1 = bitcast <8 x i64> %__b to <8 x i64>
19582 %2 = icmp ult <8 x i64> %0, %1
19583 %3 = bitcast i8 %__u to <8 x i1>
19584 %4 = and <8 x i1> %2, %3
19585 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19586 %6 = bitcast <64 x i1> %5 to i64
19590 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
19591 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19592 ; VLX: # %bb.0: # %entry
19593 ; VLX-NEXT: kmovd %edi, %k1
19594 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19595 ; VLX-NEXT: kmovq %k0, %rax
19596 ; VLX-NEXT: vzeroupper
19599 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
19600 ; NoVLX: # %bb.0: # %entry
19601 ; NoVLX-NEXT: kmovw %edi, %k1
19602 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
19603 ; NoVLX-NEXT: kmovw %k0, %eax
19604 ; NoVLX-NEXT: movzwl %ax, %eax
19605 ; NoVLX-NEXT: vzeroupper
19608 %0 = bitcast <8 x i64> %__a to <8 x i64>
19609 %load = load <8 x i64>, <8 x i64>* %__b
19610 %1 = bitcast <8 x i64> %load to <8 x i64>
19611 %2 = icmp ult <8 x i64> %0, %1
19612 %3 = bitcast i8 %__u to <8 x i1>
19613 %4 = and <8 x i1> %2, %3
19614 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19615 %6 = bitcast <64 x i1> %5 to i64
19620 define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
19621 ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19622 ; VLX: # %bb.0: # %entry
19623 ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19624 ; VLX-NEXT: kmovq %k0, %rax
19625 ; VLX-NEXT: vzeroupper
19628 ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
19629 ; NoVLX: # %bb.0: # %entry
19630 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
19631 ; NoVLX-NEXT: kmovw %k0, %eax
19632 ; NoVLX-NEXT: movzwl %ax, %eax
19633 ; NoVLX-NEXT: vzeroupper
19636 %0 = bitcast <8 x i64> %__a to <8 x i64>
19637 %load = load i64, i64* %__b
19638 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19639 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19640 %2 = icmp ult <8 x i64> %0, %1
19641 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19642 %4 = bitcast <64 x i1> %3 to i64
19646 define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
19647 ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19648 ; VLX: # %bb.0: # %entry
19649 ; VLX-NEXT: kmovd %edi, %k1
19650 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19651 ; VLX-NEXT: kmovq %k0, %rax
19652 ; VLX-NEXT: vzeroupper
19655 ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
19656 ; NoVLX: # %bb.0: # %entry
19657 ; NoVLX-NEXT: kmovw %edi, %k1
19658 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
19659 ; NoVLX-NEXT: kmovw %k0, %eax
19660 ; NoVLX-NEXT: movzwl %ax, %eax
19661 ; NoVLX-NEXT: vzeroupper
19664 %0 = bitcast <8 x i64> %__a to <8 x i64>
19665 %load = load i64, i64* %__b
19666 %vec = insertelement <8 x i64> undef, i64 %load, i32 0
19667 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19668 %2 = icmp ult <8 x i64> %0, %1
19669 %3 = bitcast i8 %__u to <8 x i1>
19670 %4 = and <8 x i1> %3, %2
19671 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19672 %6 = bitcast <64 x i1> %5 to i64
19677 declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
19678 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19679 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19680 ; VLX: # %bb.0: # %entry
19681 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19682 ; VLX-NEXT: kmovd %k0, %eax
19683 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19686 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
19687 ; NoVLX: # %bb.0: # %entry
19688 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19689 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19690 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19691 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19692 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19693 ; NoVLX-NEXT: kmovw %k0, %eax
19694 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19695 ; NoVLX-NEXT: vzeroupper
19698 %0 = bitcast <2 x i64> %__a to <4 x float>
19699 %1 = bitcast <2 x i64> %__b to <4 x float>
19700 %2 = fcmp oeq <4 x float> %0, %1
19701 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19702 %4 = bitcast <8 x i1> %3 to i8
19706 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19707 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19708 ; VLX: # %bb.0: # %entry
19709 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19710 ; VLX-NEXT: kmovd %k0, %eax
19711 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19714 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
19715 ; NoVLX: # %bb.0: # %entry
19716 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19717 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19718 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19719 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19720 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19721 ; NoVLX-NEXT: kmovw %k0, %eax
19722 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19723 ; NoVLX-NEXT: vzeroupper
19726 %0 = bitcast <2 x i64> %__a to <4 x float>
19727 %load = load <2 x i64>, <2 x i64>* %__b
19728 %1 = bitcast <2 x i64> %load to <4 x float>
19729 %2 = fcmp oeq <4 x float> %0, %1
19730 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19731 %4 = bitcast <8 x i1> %3 to i8
19735 define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
19736 ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19737 ; VLX: # %bb.0: # %entry
19738 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19739 ; VLX-NEXT: kmovd %k0, %eax
19740 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19743 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19744 ; NoVLX: # %bb.0: # %entry
19745 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19746 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
19747 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19748 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19749 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19750 ; NoVLX-NEXT: kmovw %k0, %eax
19751 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19752 ; NoVLX-NEXT: vzeroupper
19755 %0 = bitcast <2 x i64> %__a to <4 x float>
19756 %load = load float, float* %__b
19757 %vec = insertelement <4 x float> undef, float %load, i32 0
19758 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19759 %2 = fcmp oeq <4 x float> %0, %1
19760 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19761 %4 = bitcast <8 x i1> %3 to i8
19765 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19766 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19767 ; VLX: # %bb.0: # %entry
19768 ; VLX-NEXT: kmovd %edi, %k1
19769 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19770 ; VLX-NEXT: kmovd %k0, %eax
19771 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19774 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
19775 ; NoVLX: # %bb.0: # %entry
19776 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19777 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19778 ; NoVLX-NEXT: kmovw %edi, %k1
19779 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19780 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19781 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19782 ; NoVLX-NEXT: kmovw %k0, %eax
19783 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19784 ; NoVLX-NEXT: vzeroupper
19787 %0 = bitcast <2 x i64> %__a to <4 x float>
19788 %1 = bitcast <2 x i64> %__b to <4 x float>
19789 %2 = fcmp oeq <4 x float> %0, %1
19790 %3 = bitcast i4 %__u to <4 x i1>
19791 %4 = and <4 x i1> %2, %3
19792 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19793 %6 = bitcast <8 x i1> %5 to i8
19797 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19798 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19799 ; VLX: # %bb.0: # %entry
19800 ; VLX-NEXT: kmovd %edi, %k1
19801 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19802 ; VLX-NEXT: kmovd %k0, %eax
19803 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19806 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
19807 ; NoVLX: # %bb.0: # %entry
19808 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19809 ; NoVLX-NEXT: kmovw %edi, %k1
19810 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19811 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19812 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19813 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19814 ; NoVLX-NEXT: kmovw %k0, %eax
19815 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19816 ; NoVLX-NEXT: vzeroupper
19819 %0 = bitcast <2 x i64> %__a to <4 x float>
19820 %load = load <2 x i64>, <2 x i64>* %__b
19821 %1 = bitcast <2 x i64> %load to <4 x float>
19822 %2 = fcmp oeq <4 x float> %0, %1
19823 %3 = bitcast i4 %__u to <4 x i1>
19824 %4 = and <4 x i1> %2, %3
19825 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19826 %6 = bitcast <8 x i1> %5 to i8
19830 define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
19831 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19832 ; VLX: # %bb.0: # %entry
19833 ; VLX-NEXT: kmovd %edi, %k1
19834 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
19835 ; VLX-NEXT: kmovd %k0, %eax
19836 ; VLX-NEXT: # kill: def $al killed $al killed $eax
19839 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
19840 ; NoVLX: # %bb.0: # %entry
19841 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19842 ; NoVLX-NEXT: kmovw %edi, %k1
19843 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1
19844 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19845 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19846 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19847 ; NoVLX-NEXT: kmovw %k0, %eax
19848 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
19849 ; NoVLX-NEXT: vzeroupper
19852 %0 = bitcast <2 x i64> %__a to <4 x float>
19853 %load = load float, float* %__b
19854 %vec = insertelement <4 x float> undef, float %load, i32 0
19855 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19856 %2 = fcmp oeq <4 x float> %0, %1
19857 %3 = bitcast i4 %__u to <4 x i1>
19858 %4 = and <4 x i1> %2, %3
19859 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19860 %6 = bitcast <8 x i1> %5 to i8
19866 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19867 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19868 ; VLX: # %bb.0: # %entry
19869 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
19870 ; VLX-NEXT: kmovd %k0, %eax
19871 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19874 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
19875 ; NoVLX: # %bb.0: # %entry
19876 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19877 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19878 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19879 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19880 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19881 ; NoVLX-NEXT: kmovw %k0, %eax
19882 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19883 ; NoVLX-NEXT: vzeroupper
19886 %0 = bitcast <2 x i64> %__a to <4 x float>
19887 %1 = bitcast <2 x i64> %__b to <4 x float>
19888 %2 = fcmp oeq <4 x float> %0, %1
19889 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19890 %4 = bitcast <16 x i1> %3 to i16
19894 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19895 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19896 ; VLX: # %bb.0: # %entry
19897 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
19898 ; VLX-NEXT: kmovd %k0, %eax
19899 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19902 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
19903 ; NoVLX: # %bb.0: # %entry
19904 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19905 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
19906 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19907 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19908 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19909 ; NoVLX-NEXT: kmovw %k0, %eax
19910 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19911 ; NoVLX-NEXT: vzeroupper
19914 %0 = bitcast <2 x i64> %__a to <4 x float>
19915 %load = load <2 x i64>, <2 x i64>* %__b
19916 %1 = bitcast <2 x i64> %load to <4 x float>
19917 %2 = fcmp oeq <4 x float> %0, %1
19918 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19919 %4 = bitcast <16 x i1> %3 to i16
19923 define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
19924 ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19925 ; VLX: # %bb.0: # %entry
19926 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
19927 ; VLX-NEXT: kmovd %k0, %eax
19928 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19931 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
19932 ; NoVLX: # %bb.0: # %entry
19933 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19934 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
19935 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
19936 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19937 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19938 ; NoVLX-NEXT: kmovw %k0, %eax
19939 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19940 ; NoVLX-NEXT: vzeroupper
19943 %0 = bitcast <2 x i64> %__a to <4 x float>
19944 %load = load float, float* %__b
19945 %vec = insertelement <4 x float> undef, float %load, i32 0
19946 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
19947 %2 = fcmp oeq <4 x float> %0, %1
19948 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19949 %4 = bitcast <16 x i1> %3 to i16
19953 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
19954 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
19955 ; VLX: # %bb.0: # %entry
19956 ; VLX-NEXT: kmovd %edi, %k1
19957 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
19958 ; VLX-NEXT: kmovd %k0, %eax
19959 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19962 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
19963 ; NoVLX: # %bb.0: # %entry
19964 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
19965 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19966 ; NoVLX-NEXT: kmovw %edi, %k1
19967 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
19968 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
19969 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
19970 ; NoVLX-NEXT: kmovw %k0, %eax
19971 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
19972 ; NoVLX-NEXT: vzeroupper
19975 %0 = bitcast <2 x i64> %__a to <4 x float>
19976 %1 = bitcast <2 x i64> %__b to <4 x float>
19977 %2 = fcmp oeq <4 x float> %0, %1
19978 %3 = bitcast i4 %__u to <4 x i1>
19979 %4 = and <4 x i1> %2, %3
19980 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
19981 %6 = bitcast <16 x i1> %5 to i16
19985 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
19986 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
19987 ; VLX: # %bb.0: # %entry
19988 ; VLX-NEXT: kmovd %edi, %k1
19989 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
19990 ; VLX-NEXT: kmovd %k0, %eax
19991 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
19994 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
19995 ; NoVLX: # %bb.0: # %entry
19996 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19997 ; NoVLX-NEXT: kmovw %edi, %k1
19998 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
19999 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20000 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20001 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20002 ; NoVLX-NEXT: kmovw %k0, %eax
20003 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20004 ; NoVLX-NEXT: vzeroupper
20007 %0 = bitcast <2 x i64> %__a to <4 x float>
20008 %load = load <2 x i64>, <2 x i64>* %__b
20009 %1 = bitcast <2 x i64> %load to <4 x float>
20010 %2 = fcmp oeq <4 x float> %0, %1
20011 %3 = bitcast i4 %__u to <4 x i1>
20012 %4 = and <4 x i1> %2, %3
20013 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20014 %6 = bitcast <16 x i1> %5 to i16
20018 define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
20019 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
20020 ; VLX: # %bb.0: # %entry
20021 ; VLX-NEXT: kmovd %edi, %k1
20022 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20023 ; VLX-NEXT: kmovd %k0, %eax
20024 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20027 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
20028 ; NoVLX: # %bb.0: # %entry
20029 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20030 ; NoVLX-NEXT: kmovw %edi, %k1
20031 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1
20032 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20033 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20034 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20035 ; NoVLX-NEXT: kmovw %k0, %eax
20036 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20037 ; NoVLX-NEXT: vzeroupper
20040 %0 = bitcast <2 x i64> %__a to <4 x float>
20041 %load = load float, float* %__b
20042 %vec = insertelement <4 x float> undef, float %load, i32 0
20043 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20044 %2 = fcmp oeq <4 x float> %0, %1
20045 %3 = bitcast i4 %__u to <4 x i1>
20046 %4 = and <4 x i1> %2, %3
20047 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20048 %6 = bitcast <16 x i1> %5 to i16
20054 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20055 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
20056 ; VLX: # %bb.0: # %entry
20057 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
20058 ; VLX-NEXT: kmovd %k0, %eax
20061 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
20062 ; NoVLX: # %bb.0: # %entry
20063 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
20064 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20065 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20066 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20067 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20068 ; NoVLX-NEXT: kmovw %k0, %eax
20069 ; NoVLX-NEXT: vzeroupper
20072 %0 = bitcast <2 x i64> %__a to <4 x float>
20073 %1 = bitcast <2 x i64> %__b to <4 x float>
20074 %2 = fcmp oeq <4 x float> %0, %1
20075 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20076 %4 = bitcast <32 x i1> %3 to i32
20080 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20081 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
20082 ; VLX: # %bb.0: # %entry
20083 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
20084 ; VLX-NEXT: kmovd %k0, %eax
20087 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
20088 ; NoVLX: # %bb.0: # %entry
20089 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20090 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
20091 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20092 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20093 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20094 ; NoVLX-NEXT: kmovw %k0, %eax
20095 ; NoVLX-NEXT: vzeroupper
20098 %0 = bitcast <2 x i64> %__a to <4 x float>
20099 %load = load <2 x i64>, <2 x i64>* %__b
20100 %1 = bitcast <2 x i64> %load to <4 x float>
20101 %2 = fcmp oeq <4 x float> %0, %1
20102 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20103 %4 = bitcast <32 x i1> %3 to i32
20107 define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
20108 ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
20109 ; VLX: # %bb.0: # %entry
20110 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
20111 ; VLX-NEXT: kmovd %k0, %eax
20114 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
20115 ; NoVLX: # %bb.0: # %entry
20116 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20117 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
20118 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20119 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20120 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20121 ; NoVLX-NEXT: kmovw %k0, %eax
20122 ; NoVLX-NEXT: vzeroupper
20125 %0 = bitcast <2 x i64> %__a to <4 x float>
20126 %load = load float, float* %__b
20127 %vec = insertelement <4 x float> undef, float %load, i32 0
20128 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20129 %2 = fcmp oeq <4 x float> %0, %1
20130 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20131 %4 = bitcast <32 x i1> %3 to i32
20135 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20136 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
20137 ; VLX: # %bb.0: # %entry
20138 ; VLX-NEXT: kmovd %edi, %k1
20139 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
20140 ; VLX-NEXT: kmovd %k0, %eax
20143 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
20144 ; NoVLX: # %bb.0: # %entry
20145 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
20146 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20147 ; NoVLX-NEXT: kmovw %edi, %k1
20148 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20149 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20150 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20151 ; NoVLX-NEXT: kmovw %k0, %eax
20152 ; NoVLX-NEXT: vzeroupper
20155 %0 = bitcast <2 x i64> %__a to <4 x float>
20156 %1 = bitcast <2 x i64> %__b to <4 x float>
20157 %2 = fcmp oeq <4 x float> %0, %1
20158 %3 = bitcast i4 %__u to <4 x i1>
20159 %4 = and <4 x i1> %2, %3
20160 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20161 %6 = bitcast <32 x i1> %5 to i32
20165 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20166 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
20167 ; VLX: # %bb.0: # %entry
20168 ; VLX-NEXT: kmovd %edi, %k1
20169 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
20170 ; VLX-NEXT: kmovd %k0, %eax
20173 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
20174 ; NoVLX: # %bb.0: # %entry
20175 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20176 ; NoVLX-NEXT: kmovw %edi, %k1
20177 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
20178 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20179 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20180 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20181 ; NoVLX-NEXT: kmovw %k0, %eax
20182 ; NoVLX-NEXT: vzeroupper
20185 %0 = bitcast <2 x i64> %__a to <4 x float>
20186 %load = load <2 x i64>, <2 x i64>* %__b
20187 %1 = bitcast <2 x i64> %load to <4 x float>
20188 %2 = fcmp oeq <4 x float> %0, %1
20189 %3 = bitcast i4 %__u to <4 x i1>
20190 %4 = and <4 x i1> %2, %3
20191 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20192 %6 = bitcast <32 x i1> %5 to i32
20196 define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
20197 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
20198 ; VLX: # %bb.0: # %entry
20199 ; VLX-NEXT: kmovd %edi, %k1
20200 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20201 ; VLX-NEXT: kmovd %k0, %eax
20204 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
20205 ; NoVLX: # %bb.0: # %entry
20206 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20207 ; NoVLX-NEXT: kmovw %edi, %k1
20208 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1
20209 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20210 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20211 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20212 ; NoVLX-NEXT: kmovw %k0, %eax
20213 ; NoVLX-NEXT: vzeroupper
20216 %0 = bitcast <2 x i64> %__a to <4 x float>
20217 %load = load float, float* %__b
20218 %vec = insertelement <4 x float> undef, float %load, i32 0
20219 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20220 %2 = fcmp oeq <4 x float> %0, %1
20221 %3 = bitcast i4 %__u to <4 x i1>
20222 %4 = and <4 x i1> %2, %3
20223 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20224 %6 = bitcast <32 x i1> %5 to i32
20230 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20231 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
20232 ; VLX: # %bb.0: # %entry
20233 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
20234 ; VLX-NEXT: kmovq %k0, %rax
20237 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
20238 ; NoVLX: # %bb.0: # %entry
20239 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
20240 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20241 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20242 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20243 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20244 ; NoVLX-NEXT: kmovw %k0, %eax
20245 ; NoVLX-NEXT: movzwl %ax, %eax
20246 ; NoVLX-NEXT: vzeroupper
20249 %0 = bitcast <2 x i64> %__a to <4 x float>
20250 %1 = bitcast <2 x i64> %__b to <4 x float>
20251 %2 = fcmp oeq <4 x float> %0, %1
20252 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20253 %4 = bitcast <64 x i1> %3 to i64
20257 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20258 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
20259 ; VLX: # %bb.0: # %entry
20260 ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
20261 ; VLX-NEXT: kmovq %k0, %rax
20264 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
20265 ; NoVLX: # %bb.0: # %entry
20266 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20267 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1
20268 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20269 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20270 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20271 ; NoVLX-NEXT: kmovw %k0, %eax
20272 ; NoVLX-NEXT: movzwl %ax, %eax
20273 ; NoVLX-NEXT: vzeroupper
20276 %0 = bitcast <2 x i64> %__a to <4 x float>
20277 %load = load <2 x i64>, <2 x i64>* %__b
20278 %1 = bitcast <2 x i64> %load to <4 x float>
20279 %2 = fcmp oeq <4 x float> %0, %1
20280 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20281 %4 = bitcast <64 x i1> %3 to i64
20285 define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
20286 ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20287 ; VLX: # %bb.0: # %entry
20288 ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
20289 ; VLX-NEXT: kmovq %k0, %rax
20292 ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20293 ; NoVLX: # %bb.0: # %entry
20294 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20295 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
20296 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20297 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20298 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20299 ; NoVLX-NEXT: kmovw %k0, %eax
20300 ; NoVLX-NEXT: movzwl %ax, %eax
20301 ; NoVLX-NEXT: vzeroupper
20304 %0 = bitcast <2 x i64> %__a to <4 x float>
20305 %load = load float, float* %__b
20306 %vec = insertelement <4 x float> undef, float %load, i32 0
20307 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20308 %2 = fcmp oeq <4 x float> %0, %1
20309 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20310 %4 = bitcast <64 x i1> %3 to i64
20314 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
20315 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
20316 ; VLX: # %bb.0: # %entry
20317 ; VLX-NEXT: kmovd %edi, %k1
20318 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
20319 ; VLX-NEXT: kmovq %k0, %rax
20322 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
20323 ; NoVLX: # %bb.0: # %entry
20324 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
20325 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20326 ; NoVLX-NEXT: kmovw %edi, %k1
20327 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20328 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20329 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20330 ; NoVLX-NEXT: kmovw %k0, %eax
20331 ; NoVLX-NEXT: movzwl %ax, %eax
20332 ; NoVLX-NEXT: vzeroupper
20335 %0 = bitcast <2 x i64> %__a to <4 x float>
20336 %1 = bitcast <2 x i64> %__b to <4 x float>
20337 %2 = fcmp oeq <4 x float> %0, %1
20338 %3 = bitcast i4 %__u to <4 x i1>
20339 %4 = and <4 x i1> %2, %3
20340 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20341 %6 = bitcast <64 x i1> %5 to i64
20345 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
20346 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20347 ; VLX: # %bb.0: # %entry
20348 ; VLX-NEXT: kmovd %edi, %k1
20349 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
20350 ; VLX-NEXT: kmovq %k0, %rax
20353 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
20354 ; NoVLX: # %bb.0: # %entry
20355 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20356 ; NoVLX-NEXT: kmovw %edi, %k1
20357 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1
20358 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20359 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20360 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20361 ; NoVLX-NEXT: kmovw %k0, %eax
20362 ; NoVLX-NEXT: movzwl %ax, %eax
20363 ; NoVLX-NEXT: vzeroupper
20366 %0 = bitcast <2 x i64> %__a to <4 x float>
20367 %load = load <2 x i64>, <2 x i64>* %__b
20368 %1 = bitcast <2 x i64> %load to <4 x float>
20369 %2 = fcmp oeq <4 x float> %0, %1
20370 %3 = bitcast i4 %__u to <4 x i1>
20371 %4 = and <4 x i1> %2, %3
20372 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20373 %6 = bitcast <64 x i1> %5 to i64
20377 define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
20378 ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20379 ; VLX: # %bb.0: # %entry
20380 ; VLX-NEXT: kmovd %edi, %k1
20381 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
20382 ; VLX-NEXT: kmovq %k0, %rax
20385 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
20386 ; NoVLX: # %bb.0: # %entry
20387 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20388 ; NoVLX-NEXT: kmovw %edi, %k1
20389 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1
20390 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20391 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
20392 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
20393 ; NoVLX-NEXT: kmovw %k0, %eax
20394 ; NoVLX-NEXT: movzwl %ax, %eax
20395 ; NoVLX-NEXT: vzeroupper
20398 %0 = bitcast <2 x i64> %__a to <4 x float>
20399 %load = load float, float* %__b
20400 %vec = insertelement <4 x float> undef, float %load, i32 0
20401 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
20402 %2 = fcmp oeq <4 x float> %0, %1
20403 %3 = bitcast i4 %__u to <4 x i1>
20404 %4 = and <4 x i1> %2, %3
20405 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
20406 %6 = bitcast <64 x i1> %5 to i64
20412 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20413 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20414 ; VLX: # %bb.0: # %entry
20415 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20416 ; VLX-NEXT: kmovd %k0, %eax
20417 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20418 ; VLX-NEXT: vzeroupper
20421 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
20422 ; NoVLX: # %bb.0: # %entry
20423 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20424 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20425 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20426 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20427 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20428 ; NoVLX-NEXT: kmovw %k0, %eax
20429 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20430 ; NoVLX-NEXT: vzeroupper
20433 %0 = bitcast <4 x i64> %__a to <8 x float>
20434 %1 = bitcast <4 x i64> %__b to <8 x float>
20435 %2 = fcmp oeq <8 x float> %0, %1
20436 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20437 %4 = bitcast <16 x i1> %3 to i16
20441 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20442 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20443 ; VLX: # %bb.0: # %entry
20444 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20445 ; VLX-NEXT: kmovd %k0, %eax
20446 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20447 ; VLX-NEXT: vzeroupper
20450 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
20451 ; NoVLX: # %bb.0: # %entry
20452 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20453 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20454 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20455 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20456 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20457 ; NoVLX-NEXT: kmovw %k0, %eax
20458 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20459 ; NoVLX-NEXT: vzeroupper
20462 %0 = bitcast <4 x i64> %__a to <8 x float>
20463 %load = load <4 x i64>, <4 x i64>* %__b
20464 %1 = bitcast <4 x i64> %load to <8 x float>
20465 %2 = fcmp oeq <8 x float> %0, %1
20466 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20467 %4 = bitcast <16 x i1> %3 to i16
20471 define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20472 ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20473 ; VLX: # %bb.0: # %entry
20474 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20475 ; VLX-NEXT: kmovd %k0, %eax
20476 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20477 ; VLX-NEXT: vzeroupper
20480 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20481 ; NoVLX: # %bb.0: # %entry
20482 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20483 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
20484 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20485 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20486 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20487 ; NoVLX-NEXT: kmovw %k0, %eax
20488 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20489 ; NoVLX-NEXT: vzeroupper
20492 %0 = bitcast <4 x i64> %__a to <8 x float>
20493 %load = load float, float* %__b
20494 %vec = insertelement <8 x float> undef, float %load, i32 0
20495 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20496 %2 = fcmp oeq <8 x float> %0, %1
20497 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20498 %4 = bitcast <16 x i1> %3 to i16
20502 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20503 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20504 ; VLX: # %bb.0: # %entry
20505 ; VLX-NEXT: kmovd %edi, %k1
20506 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20507 ; VLX-NEXT: kmovd %k0, %eax
20508 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20509 ; VLX-NEXT: vzeroupper
20512 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
20513 ; NoVLX: # %bb.0: # %entry
20514 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20515 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20516 ; NoVLX-NEXT: kmovw %edi, %k1
20517 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20518 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20519 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20520 ; NoVLX-NEXT: kmovw %k0, %eax
20521 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20522 ; NoVLX-NEXT: vzeroupper
20525 %0 = bitcast <4 x i64> %__a to <8 x float>
20526 %1 = bitcast <4 x i64> %__b to <8 x float>
20527 %2 = fcmp oeq <8 x float> %0, %1
20528 %3 = bitcast i8 %__u to <8 x i1>
20529 %4 = and <8 x i1> %2, %3
20530 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20531 %6 = bitcast <16 x i1> %5 to i16
20535 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20536 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20537 ; VLX: # %bb.0: # %entry
20538 ; VLX-NEXT: kmovd %edi, %k1
20539 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20540 ; VLX-NEXT: kmovd %k0, %eax
20541 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20542 ; VLX-NEXT: vzeroupper
20545 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
20546 ; NoVLX: # %bb.0: # %entry
20547 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20548 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20549 ; NoVLX-NEXT: kmovw %edi, %k1
20550 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20551 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20552 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20553 ; NoVLX-NEXT: kmovw %k0, %eax
20554 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20555 ; NoVLX-NEXT: vzeroupper
20558 %0 = bitcast <4 x i64> %__a to <8 x float>
20559 %load = load <4 x i64>, <4 x i64>* %__b
20560 %1 = bitcast <4 x i64> %load to <8 x float>
20561 %2 = fcmp oeq <8 x float> %0, %1
20562 %3 = bitcast i8 %__u to <8 x i1>
20563 %4 = and <8 x i1> %2, %3
20564 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20565 %6 = bitcast <16 x i1> %5 to i16
20569 define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20570 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20571 ; VLX: # %bb.0: # %entry
20572 ; VLX-NEXT: kmovd %edi, %k1
20573 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20574 ; VLX-NEXT: kmovd %k0, %eax
20575 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
20576 ; VLX-NEXT: vzeroupper
20579 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
20580 ; NoVLX: # %bb.0: # %entry
20581 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20582 ; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
20583 ; NoVLX-NEXT: kmovw %edi, %k1
20584 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20585 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20586 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20587 ; NoVLX-NEXT: kmovw %k0, %eax
20588 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
20589 ; NoVLX-NEXT: vzeroupper
20592 %0 = bitcast <4 x i64> %__a to <8 x float>
20593 %load = load float, float* %__b
20594 %vec = insertelement <8 x float> undef, float %load, i32 0
20595 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20596 %2 = fcmp oeq <8 x float> %0, %1
20597 %3 = bitcast i8 %__u to <8 x i1>
20598 %4 = and <8 x i1> %2, %3
20599 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20600 %6 = bitcast <16 x i1> %5 to i16
20606 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20607 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20608 ; VLX: # %bb.0: # %entry
20609 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20610 ; VLX-NEXT: kmovd %k0, %eax
20611 ; VLX-NEXT: vzeroupper
20614 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
20615 ; NoVLX: # %bb.0: # %entry
20616 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20617 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20618 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20619 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20620 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20621 ; NoVLX-NEXT: kmovw %k0, %eax
20622 ; NoVLX-NEXT: vzeroupper
20625 %0 = bitcast <4 x i64> %__a to <8 x float>
20626 %1 = bitcast <4 x i64> %__b to <8 x float>
20627 %2 = fcmp oeq <8 x float> %0, %1
20628 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20629 %4 = bitcast <32 x i1> %3 to i32
20633 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20634 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20635 ; VLX: # %bb.0: # %entry
20636 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20637 ; VLX-NEXT: kmovd %k0, %eax
20638 ; VLX-NEXT: vzeroupper
20641 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
20642 ; NoVLX: # %bb.0: # %entry
20643 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20644 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20645 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20646 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20647 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20648 ; NoVLX-NEXT: kmovw %k0, %eax
20649 ; NoVLX-NEXT: vzeroupper
20652 %0 = bitcast <4 x i64> %__a to <8 x float>
20653 %load = load <4 x i64>, <4 x i64>* %__b
20654 %1 = bitcast <4 x i64> %load to <8 x float>
20655 %2 = fcmp oeq <8 x float> %0, %1
20656 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20657 %4 = bitcast <32 x i1> %3 to i32
20661 define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20662 ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20663 ; VLX: # %bb.0: # %entry
20664 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20665 ; VLX-NEXT: kmovd %k0, %eax
20666 ; VLX-NEXT: vzeroupper
20669 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20670 ; NoVLX: # %bb.0: # %entry
20671 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20672 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
20673 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20674 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20675 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20676 ; NoVLX-NEXT: kmovw %k0, %eax
20677 ; NoVLX-NEXT: vzeroupper
20680 %0 = bitcast <4 x i64> %__a to <8 x float>
20681 %load = load float, float* %__b
20682 %vec = insertelement <8 x float> undef, float %load, i32 0
20683 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20684 %2 = fcmp oeq <8 x float> %0, %1
20685 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20686 %4 = bitcast <32 x i1> %3 to i32
20690 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20691 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20692 ; VLX: # %bb.0: # %entry
20693 ; VLX-NEXT: kmovd %edi, %k1
20694 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20695 ; VLX-NEXT: kmovd %k0, %eax
20696 ; VLX-NEXT: vzeroupper
20699 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
20700 ; NoVLX: # %bb.0: # %entry
20701 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20702 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20703 ; NoVLX-NEXT: kmovw %edi, %k1
20704 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20705 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20706 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20707 ; NoVLX-NEXT: kmovw %k0, %eax
20708 ; NoVLX-NEXT: vzeroupper
20711 %0 = bitcast <4 x i64> %__a to <8 x float>
20712 %1 = bitcast <4 x i64> %__b to <8 x float>
20713 %2 = fcmp oeq <8 x float> %0, %1
20714 %3 = bitcast i8 %__u to <8 x i1>
20715 %4 = and <8 x i1> %2, %3
20716 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20717 %6 = bitcast <32 x i1> %5 to i32
20721 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20722 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20723 ; VLX: # %bb.0: # %entry
20724 ; VLX-NEXT: kmovd %edi, %k1
20725 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20726 ; VLX-NEXT: kmovd %k0, %eax
20727 ; VLX-NEXT: vzeroupper
20730 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
20731 ; NoVLX: # %bb.0: # %entry
20732 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20733 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20734 ; NoVLX-NEXT: kmovw %edi, %k1
20735 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20736 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20737 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20738 ; NoVLX-NEXT: kmovw %k0, %eax
20739 ; NoVLX-NEXT: vzeroupper
20742 %0 = bitcast <4 x i64> %__a to <8 x float>
20743 %load = load <4 x i64>, <4 x i64>* %__b
20744 %1 = bitcast <4 x i64> %load to <8 x float>
20745 %2 = fcmp oeq <8 x float> %0, %1
20746 %3 = bitcast i8 %__u to <8 x i1>
20747 %4 = and <8 x i1> %2, %3
20748 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20749 %6 = bitcast <32 x i1> %5 to i32
20753 define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20754 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20755 ; VLX: # %bb.0: # %entry
20756 ; VLX-NEXT: kmovd %edi, %k1
20757 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20758 ; VLX-NEXT: kmovd %k0, %eax
20759 ; VLX-NEXT: vzeroupper
20762 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
20763 ; NoVLX: # %bb.0: # %entry
20764 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20765 ; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
20766 ; NoVLX-NEXT: kmovw %edi, %k1
20767 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20768 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20769 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20770 ; NoVLX-NEXT: kmovw %k0, %eax
20771 ; NoVLX-NEXT: vzeroupper
20774 %0 = bitcast <4 x i64> %__a to <8 x float>
20775 %load = load float, float* %__b
20776 %vec = insertelement <8 x float> undef, float %load, i32 0
20777 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20778 %2 = fcmp oeq <8 x float> %0, %1
20779 %3 = bitcast i8 %__u to <8 x i1>
20780 %4 = and <8 x i1> %2, %3
20781 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20782 %6 = bitcast <32 x i1> %5 to i32
20788 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20789 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20790 ; VLX: # %bb.0: # %entry
20791 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
20792 ; VLX-NEXT: kmovq %k0, %rax
20793 ; VLX-NEXT: vzeroupper
20796 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
20797 ; NoVLX: # %bb.0: # %entry
20798 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20799 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20800 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20801 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20802 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20803 ; NoVLX-NEXT: kmovw %k0, %eax
20804 ; NoVLX-NEXT: movzwl %ax, %eax
20805 ; NoVLX-NEXT: vzeroupper
20808 %0 = bitcast <4 x i64> %__a to <8 x float>
20809 %1 = bitcast <4 x i64> %__b to <8 x float>
20810 %2 = fcmp oeq <8 x float> %0, %1
20811 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20812 %4 = bitcast <64 x i1> %3 to i64
20816 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20817 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20818 ; VLX: # %bb.0: # %entry
20819 ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
20820 ; VLX-NEXT: kmovq %k0, %rax
20821 ; VLX-NEXT: vzeroupper
20824 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
20825 ; NoVLX: # %bb.0: # %entry
20826 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20827 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1
20828 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20829 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20830 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20831 ; NoVLX-NEXT: kmovw %k0, %eax
20832 ; NoVLX-NEXT: movzwl %ax, %eax
20833 ; NoVLX-NEXT: vzeroupper
20836 %0 = bitcast <4 x i64> %__a to <8 x float>
20837 %load = load <4 x i64>, <4 x i64>* %__b
20838 %1 = bitcast <4 x i64> %load to <8 x float>
20839 %2 = fcmp oeq <8 x float> %0, %1
20840 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20841 %4 = bitcast <64 x i1> %3 to i64
20845 define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
20846 ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20847 ; VLX: # %bb.0: # %entry
20848 ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
20849 ; VLX-NEXT: kmovq %k0, %rax
20850 ; VLX-NEXT: vzeroupper
20853 ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20854 ; NoVLX: # %bb.0: # %entry
20855 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20856 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
20857 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20858 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20859 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20860 ; NoVLX-NEXT: kmovw %k0, %eax
20861 ; NoVLX-NEXT: movzwl %ax, %eax
20862 ; NoVLX-NEXT: vzeroupper
20865 %0 = bitcast <4 x i64> %__a to <8 x float>
20866 %load = load float, float* %__b
20867 %vec = insertelement <8 x float> undef, float %load, i32 0
20868 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20869 %2 = fcmp oeq <8 x float> %0, %1
20870 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20871 %4 = bitcast <64 x i1> %3 to i64
20875 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
20876 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20877 ; VLX: # %bb.0: # %entry
20878 ; VLX-NEXT: kmovd %edi, %k1
20879 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
20880 ; VLX-NEXT: kmovq %k0, %rax
20881 ; VLX-NEXT: vzeroupper
20884 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
20885 ; NoVLX: # %bb.0: # %entry
20886 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
20887 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20888 ; NoVLX-NEXT: kmovw %edi, %k1
20889 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20890 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20891 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20892 ; NoVLX-NEXT: kmovw %k0, %eax
20893 ; NoVLX-NEXT: movzwl %ax, %eax
20894 ; NoVLX-NEXT: vzeroupper
20897 %0 = bitcast <4 x i64> %__a to <8 x float>
20898 %1 = bitcast <4 x i64> %__b to <8 x float>
20899 %2 = fcmp oeq <8 x float> %0, %1
20900 %3 = bitcast i8 %__u to <8 x i1>
20901 %4 = and <8 x i1> %2, %3
20902 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20903 %6 = bitcast <64 x i1> %5 to i64
20907 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
20908 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20909 ; VLX: # %bb.0: # %entry
20910 ; VLX-NEXT: kmovd %edi, %k1
20911 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
20912 ; VLX-NEXT: kmovq %k0, %rax
20913 ; VLX-NEXT: vzeroupper
20916 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
20917 ; NoVLX: # %bb.0: # %entry
20918 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20919 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1
20920 ; NoVLX-NEXT: kmovw %edi, %k1
20921 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20922 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20923 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20924 ; NoVLX-NEXT: kmovw %k0, %eax
20925 ; NoVLX-NEXT: movzwl %ax, %eax
20926 ; NoVLX-NEXT: vzeroupper
20929 %0 = bitcast <4 x i64> %__a to <8 x float>
20930 %load = load <4 x i64>, <4 x i64>* %__b
20931 %1 = bitcast <4 x i64> %load to <8 x float>
20932 %2 = fcmp oeq <8 x float> %0, %1
20933 %3 = bitcast i8 %__u to <8 x i1>
20934 %4 = and <8 x i1> %2, %3
20935 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20936 %6 = bitcast <64 x i1> %5 to i64
20940 define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
20941 ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20942 ; VLX: # %bb.0: # %entry
20943 ; VLX-NEXT: kmovd %edi, %k1
20944 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
20945 ; VLX-NEXT: kmovq %k0, %rax
20946 ; VLX-NEXT: vzeroupper
20949 ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
20950 ; NoVLX: # %bb.0: # %entry
20951 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
20952 ; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
20953 ; NoVLX-NEXT: kmovw %edi, %k1
20954 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
20955 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0
20956 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
20957 ; NoVLX-NEXT: kmovw %k0, %eax
20958 ; NoVLX-NEXT: movzwl %ax, %eax
20959 ; NoVLX-NEXT: vzeroupper
20962 %0 = bitcast <4 x i64> %__a to <8 x float>
20963 %load = load float, float* %__b
20964 %vec = insertelement <8 x float> undef, float %load, i32 0
20965 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
20966 %2 = fcmp oeq <8 x float> %0, %1
20967 %3 = bitcast i8 %__u to <8 x i1>
20968 %4 = and <8 x i1> %2, %3
20969 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20970 %6 = bitcast <64 x i1> %5 to i64
20976 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
20977 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
20978 ; VLX: # %bb.0: # %entry
20979 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20980 ; VLX-NEXT: kmovd %k0, %eax
20981 ; VLX-NEXT: vzeroupper
20984 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
20985 ; NoVLX: # %bb.0: # %entry
20986 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
20987 ; NoVLX-NEXT: kmovw %k0, %eax
20988 ; NoVLX-NEXT: vzeroupper
20991 %0 = bitcast <8 x i64> %__a to <16 x float>
20992 %1 = bitcast <8 x i64> %__b to <16 x float>
20993 %2 = fcmp oeq <16 x float> %0, %1
20994 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
20995 %4 = bitcast <32 x i1> %3 to i32
20999 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
21000 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
21001 ; VLX: # %bb.0: # %entry
21002 ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
21003 ; VLX-NEXT: kmovd %k0, %eax
21004 ; VLX-NEXT: vzeroupper
21007 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
21008 ; NoVLX: # %bb.0: # %entry
21009 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
21010 ; NoVLX-NEXT: kmovw %k0, %eax
21011 ; NoVLX-NEXT: vzeroupper
21014 %0 = bitcast <8 x i64> %__a to <16 x float>
21015 %load = load <8 x i64>, <8 x i64>* %__b
21016 %1 = bitcast <8 x i64> %load to <16 x float>
21017 %2 = fcmp oeq <16 x float> %0, %1
21018 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21019 %4 = bitcast <32 x i1> %3 to i32
21023 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
21024 ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
21025 ; VLX: # %bb.0: # %entry
21026 ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
21027 ; VLX-NEXT: kmovd %k0, %eax
21028 ; VLX-NEXT: vzeroupper
21031 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
21032 ; NoVLX: # %bb.0: # %entry
21033 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
21034 ; NoVLX-NEXT: kmovw %k0, %eax
21035 ; NoVLX-NEXT: vzeroupper
21038 %0 = bitcast <8 x i64> %__a to <16 x float>
21039 %load = load float, float* %__b
21040 %vec = insertelement <16 x float> undef, float %load, i32 0
21041 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21042 %2 = fcmp oeq <16 x float> %0, %1
21043 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21044 %4 = bitcast <32 x i1> %3 to i32
21048 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21049 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
21050 ; VLX: # %bb.0: # %entry
21051 ; VLX-NEXT: kmovd %edi, %k1
21052 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
21053 ; VLX-NEXT: kmovd %k0, %eax
21054 ; VLX-NEXT: vzeroupper
21057 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
21058 ; NoVLX: # %bb.0: # %entry
21059 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
21060 ; NoVLX-NEXT: kmovw %k0, %eax
21061 ; NoVLX-NEXT: andl %edi, %eax
21062 ; NoVLX-NEXT: vzeroupper
21065 %0 = bitcast <8 x i64> %__a to <16 x float>
21066 %1 = bitcast <8 x i64> %__b to <16 x float>
21067 %2 = fcmp oeq <16 x float> %0, %1
21068 %3 = bitcast i16 %__u to <16 x i1>
21069 %4 = and <16 x i1> %2, %3
21070 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21071 %6 = bitcast <32 x i1> %5 to i32
21075 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
21076 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
21077 ; VLX: # %bb.0: # %entry
21078 ; VLX-NEXT: kmovd %edi, %k1
21079 ; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
21080 ; VLX-NEXT: kmovd %k0, %eax
21081 ; VLX-NEXT: vzeroupper
21084 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
21085 ; NoVLX: # %bb.0: # %entry
21086 ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0
21087 ; NoVLX-NEXT: kmovw %k0, %eax
21088 ; NoVLX-NEXT: andl %edi, %eax
21089 ; NoVLX-NEXT: vzeroupper
21092 %0 = bitcast <8 x i64> %__a to <16 x float>
21093 %load = load <8 x i64>, <8 x i64>* %__b
21094 %1 = bitcast <8 x i64> %load to <16 x float>
21095 %2 = fcmp oeq <16 x float> %0, %1
21096 %3 = bitcast i16 %__u to <16 x i1>
21097 %4 = and <16 x i1> %2, %3
21098 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21099 %6 = bitcast <32 x i1> %5 to i32
21103 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
21104 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
21105 ; VLX: # %bb.0: # %entry
21106 ; VLX-NEXT: kmovd %edi, %k1
21107 ; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
21108 ; VLX-NEXT: kmovd %k0, %eax
21109 ; VLX-NEXT: vzeroupper
21112 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
21113 ; NoVLX: # %bb.0: # %entry
21114 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0
21115 ; NoVLX-NEXT: kmovw %k0, %eax
21116 ; NoVLX-NEXT: andl %edi, %eax
21117 ; NoVLX-NEXT: vzeroupper
21120 %0 = bitcast <8 x i64> %__a to <16 x float>
21121 %load = load float, float* %__b
21122 %vec = insertelement <16 x float> undef, float %load, i32 0
21123 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21124 %2 = fcmp oeq <16 x float> %0, %1
21125 %3 = bitcast i16 %__u to <16 x i1>
21126 %4 = and <16 x i1> %2, %3
21127 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21128 %6 = bitcast <32 x i1> %5 to i32
21134 define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21135 ; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
21136 ; CHECK: # %bb.0: # %entry
21137 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21138 ; CHECK-NEXT: kmovw %k0, %eax
21139 ; CHECK-NEXT: vzeroupper
21142 %0 = bitcast <8 x i64> %__a to <16 x float>
21143 %1 = bitcast <8 x i64> %__b to <16 x float>
21144 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21145 %3 = bitcast <16 x i1> %2 to i16
21146 %4 = zext i16 %3 to i32
21150 define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21151 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
21152 ; VLX: # %bb.0: # %entry
21153 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21154 ; VLX-NEXT: kmovd %k0, %eax
21155 ; VLX-NEXT: andl %edi, %eax
21156 ; VLX-NEXT: vzeroupper
21159 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
21160 ; NoVLX: # %bb.0: # %entry
21161 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21162 ; NoVLX-NEXT: kmovw %k0, %eax
21163 ; NoVLX-NEXT: andl %edi, %eax
21164 ; NoVLX-NEXT: vzeroupper
21167 %0 = bitcast <8 x i64> %__a to <16 x float>
21168 %1 = bitcast <8 x i64> %__b to <16 x float>
21169 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21170 %3 = bitcast i16 %__u to <16 x i1>
21171 %4 = and <16 x i1> %2, %3
21172 %5 = bitcast <16 x i1> %4 to i16
21173 %6 = zext i16 %5 to i32
21179 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21180 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
21181 ; VLX: # %bb.0: # %entry
21182 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
21183 ; VLX-NEXT: kmovq %k0, %rax
21184 ; VLX-NEXT: vzeroupper
21187 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
21188 ; NoVLX: # %bb.0: # %entry
21189 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
21190 ; NoVLX-NEXT: kmovw %k0, %eax
21191 ; NoVLX-NEXT: movzwl %ax, %eax
21192 ; NoVLX-NEXT: vzeroupper
21195 %0 = bitcast <8 x i64> %__a to <16 x float>
21196 %1 = bitcast <8 x i64> %__b to <16 x float>
21197 %2 = fcmp oeq <16 x float> %0, %1
21198 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21199 %4 = bitcast <64 x i1> %3 to i64
21203 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
21204 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
21205 ; VLX: # %bb.0: # %entry
21206 ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
21207 ; VLX-NEXT: kmovq %k0, %rax
21208 ; VLX-NEXT: vzeroupper
21211 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
21212 ; NoVLX: # %bb.0: # %entry
21213 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
21214 ; NoVLX-NEXT: kmovw %k0, %eax
21215 ; NoVLX-NEXT: movzwl %ax, %eax
21216 ; NoVLX-NEXT: vzeroupper
21219 %0 = bitcast <8 x i64> %__a to <16 x float>
21220 %load = load <8 x i64>, <8 x i64>* %__b
21221 %1 = bitcast <8 x i64> %load to <16 x float>
21222 %2 = fcmp oeq <16 x float> %0, %1
21223 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21224 %4 = bitcast <64 x i1> %3 to i64
21228 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
21229 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21230 ; VLX: # %bb.0: # %entry
21231 ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
21232 ; VLX-NEXT: kmovq %k0, %rax
21233 ; VLX-NEXT: vzeroupper
21236 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21237 ; NoVLX: # %bb.0: # %entry
21238 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
21239 ; NoVLX-NEXT: kmovw %k0, %eax
21240 ; NoVLX-NEXT: movzwl %ax, %eax
21241 ; NoVLX-NEXT: vzeroupper
21244 %0 = bitcast <8 x i64> %__a to <16 x float>
21245 %load = load float, float* %__b
21246 %vec = insertelement <16 x float> undef, float %load, i32 0
21247 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21248 %2 = fcmp oeq <16 x float> %0, %1
21249 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21250 %4 = bitcast <64 x i1> %3 to i64
21254 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21255 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
21256 ; VLX: # %bb.0: # %entry
21257 ; VLX-NEXT: kmovd %edi, %k1
21258 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
21259 ; VLX-NEXT: kmovq %k0, %rax
21260 ; VLX-NEXT: vzeroupper
21263 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
21264 ; NoVLX: # %bb.0: # %entry
21265 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
21266 ; NoVLX-NEXT: kmovw %k0, %eax
21267 ; NoVLX-NEXT: andl %edi, %eax
21268 ; NoVLX-NEXT: vzeroupper
21271 %0 = bitcast <8 x i64> %__a to <16 x float>
21272 %1 = bitcast <8 x i64> %__b to <16 x float>
21273 %2 = fcmp oeq <16 x float> %0, %1
21274 %3 = bitcast i16 %__u to <16 x i1>
21275 %4 = and <16 x i1> %2, %3
21276 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21277 %6 = bitcast <64 x i1> %5 to i64
21281 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
21282 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
21283 ; VLX: # %bb.0: # %entry
21284 ; VLX-NEXT: kmovd %edi, %k1
21285 ; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
21286 ; VLX-NEXT: kmovq %k0, %rax
21287 ; VLX-NEXT: vzeroupper
21290 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
21291 ; NoVLX: # %bb.0: # %entry
21292 ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0
21293 ; NoVLX-NEXT: kmovw %k0, %eax
21294 ; NoVLX-NEXT: andl %edi, %eax
21295 ; NoVLX-NEXT: vzeroupper
21298 %0 = bitcast <8 x i64> %__a to <16 x float>
21299 %load = load <8 x i64>, <8 x i64>* %__b
21300 %1 = bitcast <8 x i64> %load to <16 x float>
21301 %2 = fcmp oeq <16 x float> %0, %1
21302 %3 = bitcast i16 %__u to <16 x i1>
21303 %4 = and <16 x i1> %2, %3
21304 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21305 %6 = bitcast <64 x i1> %5 to i64
21309 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
21310 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21311 ; VLX: # %bb.0: # %entry
21312 ; VLX-NEXT: kmovd %edi, %k1
21313 ; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
21314 ; VLX-NEXT: kmovq %k0, %rax
21315 ; VLX-NEXT: vzeroupper
21318 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
21319 ; NoVLX: # %bb.0: # %entry
21320 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0
21321 ; NoVLX-NEXT: kmovw %k0, %eax
21322 ; NoVLX-NEXT: andl %edi, %eax
21323 ; NoVLX-NEXT: vzeroupper
21326 %0 = bitcast <8 x i64> %__a to <16 x float>
21327 %load = load float, float* %__b
21328 %vec = insertelement <16 x float> undef, float %load, i32 0
21329 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21330 %2 = fcmp oeq <16 x float> %0, %1
21331 %3 = bitcast i16 %__u to <16 x i1>
21332 %4 = and <16 x i1> %2, %3
21333 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
21334 %6 = bitcast <64 x i1> %5 to i64
21340 define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21341 ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
21342 ; VLX: # %bb.0: # %entry
21343 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21344 ; VLX-NEXT: kmovd %k0, %eax
21345 ; VLX-NEXT: movzwl %ax, %eax
21346 ; VLX-NEXT: vzeroupper
21349 ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
21350 ; NoVLX: # %bb.0: # %entry
21351 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21352 ; NoVLX-NEXT: kmovw %k0, %eax
21353 ; NoVLX-NEXT: movzwl %ax, %eax
21354 ; NoVLX-NEXT: vzeroupper
21357 %0 = bitcast <8 x i64> %__a to <16 x float>
21358 %1 = bitcast <8 x i64> %__b to <16 x float>
21359 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21360 %3 = bitcast <16 x i1> %2 to i16
21361 %4 = zext i16 %3 to i64
21365 define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
21366 ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21367 ; VLX: # %bb.0: # %entry
21368 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21369 ; VLX-NEXT: kmovd %k0, %eax
21370 ; VLX-NEXT: andl %edi, %eax
21371 ; VLX-NEXT: vzeroupper
21374 ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
21375 ; NoVLX: # %bb.0: # %entry
21376 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
21377 ; NoVLX-NEXT: kmovw %k0, %eax
21378 ; NoVLX-NEXT: andl %edi, %eax
21379 ; NoVLX-NEXT: vzeroupper
21382 %0 = bitcast <8 x i64> %__a to <16 x float>
21383 %1 = bitcast <8 x i64> %__b to <16 x float>
21384 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8)
21385 %3 = bitcast i16 %__u to <16 x i1>
21386 %4 = and <16 x i1> %2, %3
21387 %5 = bitcast <16 x i1> %4 to i16
21388 %6 = zext i16 %5 to i64
21394 declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
21395 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21396 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21397 ; VLX: # %bb.0: # %entry
21398 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21399 ; VLX-NEXT: kmovb %k0, %eax
21402 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
21403 ; NoVLX: # %bb.0: # %entry
21404 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21405 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21406 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21407 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21408 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21409 ; NoVLX-NEXT: kmovw %k0, %eax
21410 ; NoVLX-NEXT: andl $3, %eax
21411 ; NoVLX-NEXT: vzeroupper
21414 %0 = bitcast <2 x i64> %__a to <2 x double>
21415 %1 = bitcast <2 x i64> %__b to <2 x double>
21416 %2 = fcmp oeq <2 x double> %0, %1
21417 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21418 %4 = bitcast <4 x i1> %3 to i4
21422 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21423 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21424 ; VLX: # %bb.0: # %entry
21425 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21426 ; VLX-NEXT: kmovb %k0, %eax
21429 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
21430 ; NoVLX: # %bb.0: # %entry
21431 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21432 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21433 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21434 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21435 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21436 ; NoVLX-NEXT: kmovw %k0, %eax
21437 ; NoVLX-NEXT: andl $3, %eax
21438 ; NoVLX-NEXT: vzeroupper
21441 %0 = bitcast <2 x i64> %__a to <2 x double>
21442 %load = load <2 x i64>, <2 x i64>* %__b
21443 %1 = bitcast <2 x i64> %load to <2 x double>
21444 %2 = fcmp oeq <2 x double> %0, %1
21445 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21446 %4 = bitcast <4 x i1> %3 to i4
21450 define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21451 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21452 ; VLX: # %bb.0: # %entry
21453 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21454 ; VLX-NEXT: kmovb %k0, %eax
21457 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21458 ; NoVLX: # %bb.0: # %entry
21459 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21460 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21461 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21462 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21463 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21464 ; NoVLX-NEXT: kmovw %k0, %eax
21465 ; NoVLX-NEXT: andl $3, %eax
21466 ; NoVLX-NEXT: vzeroupper
21469 %0 = bitcast <2 x i64> %__a to <2 x double>
21470 %load = load double, double* %__b
21471 %vec = insertelement <2 x double> undef, double %load, i32 0
21472 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21473 %2 = fcmp oeq <2 x double> %0, %1
21474 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21475 %4 = bitcast <4 x i1> %3 to i4
21479 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21480 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21481 ; VLX: # %bb.0: # %entry
21482 ; VLX-NEXT: kmovd %edi, %k1
21483 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21484 ; VLX-NEXT: kmovb %k0, %eax
21487 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
21488 ; NoVLX: # %bb.0: # %entry
21489 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21490 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21491 ; NoVLX-NEXT: kmovw %edi, %k1
21492 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21493 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21494 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21495 ; NoVLX-NEXT: kmovw %k0, %eax
21496 ; NoVLX-NEXT: andl $3, %eax
21497 ; NoVLX-NEXT: vzeroupper
21500 %0 = bitcast <2 x i64> %__a to <2 x double>
21501 %1 = bitcast <2 x i64> %__b to <2 x double>
21502 %2 = fcmp oeq <2 x double> %0, %1
21503 %3 = bitcast i2 %__u to <2 x i1>
21504 %4 = and <2 x i1> %2, %3
21505 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21506 %6 = bitcast <4 x i1> %5 to i4
21510 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21511 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21512 ; VLX: # %bb.0: # %entry
21513 ; VLX-NEXT: kmovd %edi, %k1
21514 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21515 ; VLX-NEXT: kmovb %k0, %eax
21518 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
21519 ; NoVLX: # %bb.0: # %entry
21520 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21521 ; NoVLX-NEXT: kmovw %edi, %k1
21522 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21523 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21524 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21525 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21526 ; NoVLX-NEXT: kmovw %k0, %eax
21527 ; NoVLX-NEXT: andl $3, %eax
21528 ; NoVLX-NEXT: vzeroupper
21531 %0 = bitcast <2 x i64> %__a to <2 x double>
21532 %load = load <2 x i64>, <2 x i64>* %__b
21533 %1 = bitcast <2 x i64> %load to <2 x double>
21534 %2 = fcmp oeq <2 x double> %0, %1
21535 %3 = bitcast i2 %__u to <2 x i1>
21536 %4 = and <2 x i1> %2, %3
21537 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21538 %6 = bitcast <4 x i1> %5 to i4
21542 define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21543 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21544 ; VLX: # %bb.0: # %entry
21545 ; VLX-NEXT: kmovd %edi, %k1
21546 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21547 ; VLX-NEXT: kmovb %k0, %eax
21550 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
21551 ; NoVLX: # %bb.0: # %entry
21552 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21553 ; NoVLX-NEXT: kmovw %edi, %k1
21554 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21555 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21556 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21557 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21558 ; NoVLX-NEXT: kmovw %k0, %eax
21559 ; NoVLX-NEXT: andl $3, %eax
21560 ; NoVLX-NEXT: vzeroupper
21563 %0 = bitcast <2 x i64> %__a to <2 x double>
21564 %load = load double, double* %__b
21565 %vec = insertelement <2 x double> undef, double %load, i32 0
21566 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21567 %2 = fcmp oeq <2 x double> %0, %1
21568 %3 = bitcast i2 %__u to <2 x i1>
21569 %4 = and <2 x i1> %2, %3
21570 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21571 %6 = bitcast <4 x i1> %5 to i4
21577 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21578 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21579 ; VLX: # %bb.0: # %entry
21580 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21581 ; VLX-NEXT: kmovd %k0, %eax
21582 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21585 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
21586 ; NoVLX: # %bb.0: # %entry
21587 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21588 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21589 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21590 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21591 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21592 ; NoVLX-NEXT: kmovw %k0, %eax
21593 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21594 ; NoVLX-NEXT: vzeroupper
21597 %0 = bitcast <2 x i64> %__a to <2 x double>
21598 %1 = bitcast <2 x i64> %__b to <2 x double>
21599 %2 = fcmp oeq <2 x double> %0, %1
21600 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21601 %4 = bitcast <8 x i1> %3 to i8
21605 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21606 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21607 ; VLX: # %bb.0: # %entry
21608 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21609 ; VLX-NEXT: kmovd %k0, %eax
21610 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21613 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
21614 ; NoVLX: # %bb.0: # %entry
21615 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21616 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21617 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21618 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21619 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21620 ; NoVLX-NEXT: kmovw %k0, %eax
21621 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21622 ; NoVLX-NEXT: vzeroupper
21625 %0 = bitcast <2 x i64> %__a to <2 x double>
21626 %load = load <2 x i64>, <2 x i64>* %__b
21627 %1 = bitcast <2 x i64> %load to <2 x double>
21628 %2 = fcmp oeq <2 x double> %0, %1
21629 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21630 %4 = bitcast <8 x i1> %3 to i8
21634 define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21635 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21636 ; VLX: # %bb.0: # %entry
21637 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21638 ; VLX-NEXT: kmovd %k0, %eax
21639 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21642 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21643 ; NoVLX: # %bb.0: # %entry
21644 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21645 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21646 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21647 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21648 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21649 ; NoVLX-NEXT: kmovw %k0, %eax
21650 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21651 ; NoVLX-NEXT: vzeroupper
21654 %0 = bitcast <2 x i64> %__a to <2 x double>
21655 %load = load double, double* %__b
21656 %vec = insertelement <2 x double> undef, double %load, i32 0
21657 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21658 %2 = fcmp oeq <2 x double> %0, %1
21659 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21660 %4 = bitcast <8 x i1> %3 to i8
21664 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21665 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21666 ; VLX: # %bb.0: # %entry
21667 ; VLX-NEXT: kmovd %edi, %k1
21668 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21669 ; VLX-NEXT: kmovd %k0, %eax
21670 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21673 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
21674 ; NoVLX: # %bb.0: # %entry
21675 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21676 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21677 ; NoVLX-NEXT: kmovw %edi, %k1
21678 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21679 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21680 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21681 ; NoVLX-NEXT: kmovw %k0, %eax
21682 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21683 ; NoVLX-NEXT: vzeroupper
21686 %0 = bitcast <2 x i64> %__a to <2 x double>
21687 %1 = bitcast <2 x i64> %__b to <2 x double>
21688 %2 = fcmp oeq <2 x double> %0, %1
21689 %3 = bitcast i2 %__u to <2 x i1>
21690 %4 = and <2 x i1> %2, %3
21691 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21692 %6 = bitcast <8 x i1> %5 to i8
21696 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21697 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21698 ; VLX: # %bb.0: # %entry
21699 ; VLX-NEXT: kmovd %edi, %k1
21700 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21701 ; VLX-NEXT: kmovd %k0, %eax
21702 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21705 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
21706 ; NoVLX: # %bb.0: # %entry
21707 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21708 ; NoVLX-NEXT: kmovw %edi, %k1
21709 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21710 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21711 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21712 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21713 ; NoVLX-NEXT: kmovw %k0, %eax
21714 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21715 ; NoVLX-NEXT: vzeroupper
21718 %0 = bitcast <2 x i64> %__a to <2 x double>
21719 %load = load <2 x i64>, <2 x i64>* %__b
21720 %1 = bitcast <2 x i64> %load to <2 x double>
21721 %2 = fcmp oeq <2 x double> %0, %1
21722 %3 = bitcast i2 %__u to <2 x i1>
21723 %4 = and <2 x i1> %2, %3
21724 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21725 %6 = bitcast <8 x i1> %5 to i8
21729 define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21730 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21731 ; VLX: # %bb.0: # %entry
21732 ; VLX-NEXT: kmovd %edi, %k1
21733 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21734 ; VLX-NEXT: kmovd %k0, %eax
21735 ; VLX-NEXT: # kill: def $al killed $al killed $eax
21738 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
21739 ; NoVLX: # %bb.0: # %entry
21740 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21741 ; NoVLX-NEXT: kmovw %edi, %k1
21742 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21743 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21744 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21745 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21746 ; NoVLX-NEXT: kmovw %k0, %eax
21747 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
21748 ; NoVLX-NEXT: vzeroupper
21751 %0 = bitcast <2 x i64> %__a to <2 x double>
21752 %load = load double, double* %__b
21753 %vec = insertelement <2 x double> undef, double %load, i32 0
21754 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21755 %2 = fcmp oeq <2 x double> %0, %1
21756 %3 = bitcast i2 %__u to <2 x i1>
21757 %4 = and <2 x i1> %2, %3
21758 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21759 %6 = bitcast <8 x i1> %5 to i8
21765 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21766 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21767 ; VLX: # %bb.0: # %entry
21768 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21769 ; VLX-NEXT: kmovd %k0, %eax
21770 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21773 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
21774 ; NoVLX: # %bb.0: # %entry
21775 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21776 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21777 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21778 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21779 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21780 ; NoVLX-NEXT: kmovw %k0, %eax
21781 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21782 ; NoVLX-NEXT: vzeroupper
21785 %0 = bitcast <2 x i64> %__a to <2 x double>
21786 %1 = bitcast <2 x i64> %__b to <2 x double>
21787 %2 = fcmp oeq <2 x double> %0, %1
21788 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21789 %4 = bitcast <16 x i1> %3 to i16
21793 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21794 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21795 ; VLX: # %bb.0: # %entry
21796 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21797 ; VLX-NEXT: kmovd %k0, %eax
21798 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21801 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
21802 ; NoVLX: # %bb.0: # %entry
21803 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21804 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21805 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21806 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21807 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21808 ; NoVLX-NEXT: kmovw %k0, %eax
21809 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21810 ; NoVLX-NEXT: vzeroupper
21813 %0 = bitcast <2 x i64> %__a to <2 x double>
21814 %load = load <2 x i64>, <2 x i64>* %__b
21815 %1 = bitcast <2 x i64> %load to <2 x double>
21816 %2 = fcmp oeq <2 x double> %0, %1
21817 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21818 %4 = bitcast <16 x i1> %3 to i16
21822 define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
21823 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21824 ; VLX: # %bb.0: # %entry
21825 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
21826 ; VLX-NEXT: kmovd %k0, %eax
21827 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21830 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21831 ; NoVLX: # %bb.0: # %entry
21832 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21833 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21834 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21835 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21836 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21837 ; NoVLX-NEXT: kmovw %k0, %eax
21838 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21839 ; NoVLX-NEXT: vzeroupper
21842 %0 = bitcast <2 x i64> %__a to <2 x double>
21843 %load = load double, double* %__b
21844 %vec = insertelement <2 x double> undef, double %load, i32 0
21845 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21846 %2 = fcmp oeq <2 x double> %0, %1
21847 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21848 %4 = bitcast <16 x i1> %3 to i16
21852 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21853 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21854 ; VLX: # %bb.0: # %entry
21855 ; VLX-NEXT: kmovd %edi, %k1
21856 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
21857 ; VLX-NEXT: kmovd %k0, %eax
21858 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21861 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
21862 ; NoVLX: # %bb.0: # %entry
21863 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21864 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21865 ; NoVLX-NEXT: kmovw %edi, %k1
21866 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21867 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21868 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21869 ; NoVLX-NEXT: kmovw %k0, %eax
21870 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21871 ; NoVLX-NEXT: vzeroupper
21874 %0 = bitcast <2 x i64> %__a to <2 x double>
21875 %1 = bitcast <2 x i64> %__b to <2 x double>
21876 %2 = fcmp oeq <2 x double> %0, %1
21877 %3 = bitcast i2 %__u to <2 x i1>
21878 %4 = and <2 x i1> %2, %3
21879 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21880 %6 = bitcast <16 x i1> %5 to i16
21884 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21885 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21886 ; VLX: # %bb.0: # %entry
21887 ; VLX-NEXT: kmovd %edi, %k1
21888 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
21889 ; VLX-NEXT: kmovd %k0, %eax
21890 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21893 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
21894 ; NoVLX: # %bb.0: # %entry
21895 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21896 ; NoVLX-NEXT: kmovw %edi, %k1
21897 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
21898 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21899 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21900 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21901 ; NoVLX-NEXT: kmovw %k0, %eax
21902 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21903 ; NoVLX-NEXT: vzeroupper
21906 %0 = bitcast <2 x i64> %__a to <2 x double>
21907 %load = load <2 x i64>, <2 x i64>* %__b
21908 %1 = bitcast <2 x i64> %load to <2 x double>
21909 %2 = fcmp oeq <2 x double> %0, %1
21910 %3 = bitcast i2 %__u to <2 x i1>
21911 %4 = and <2 x i1> %2, %3
21912 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21913 %6 = bitcast <16 x i1> %5 to i16
21917 define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
21918 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21919 ; VLX: # %bb.0: # %entry
21920 ; VLX-NEXT: kmovd %edi, %k1
21921 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
21922 ; VLX-NEXT: kmovd %k0, %eax
21923 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
21926 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
21927 ; NoVLX: # %bb.0: # %entry
21928 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21929 ; NoVLX-NEXT: kmovw %edi, %k1
21930 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
21931 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
21932 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21933 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21934 ; NoVLX-NEXT: kmovw %k0, %eax
21935 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
21936 ; NoVLX-NEXT: vzeroupper
21939 %0 = bitcast <2 x i64> %__a to <2 x double>
21940 %load = load double, double* %__b
21941 %vec = insertelement <2 x double> undef, double %load, i32 0
21942 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21943 %2 = fcmp oeq <2 x double> %0, %1
21944 %3 = bitcast i2 %__u to <2 x i1>
21945 %4 = and <2 x i1> %2, %3
21946 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21947 %6 = bitcast <16 x i1> %5 to i16
21953 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
21954 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
21955 ; VLX: # %bb.0: # %entry
21956 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
21957 ; VLX-NEXT: kmovd %k0, %eax
21960 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
21961 ; NoVLX: # %bb.0: # %entry
21962 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
21963 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21964 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21965 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21966 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21967 ; NoVLX-NEXT: kmovw %k0, %eax
21968 ; NoVLX-NEXT: vzeroupper
21971 %0 = bitcast <2 x i64> %__a to <2 x double>
21972 %1 = bitcast <2 x i64> %__b to <2 x double>
21973 %2 = fcmp oeq <2 x double> %0, %1
21974 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
21975 %4 = bitcast <32 x i1> %3 to i32
21979 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
21980 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
21981 ; VLX: # %bb.0: # %entry
21982 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
21983 ; VLX-NEXT: kmovd %k0, %eax
21986 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
21987 ; NoVLX: # %bb.0: # %entry
21988 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21989 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
21990 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
21991 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
21992 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
21993 ; NoVLX-NEXT: kmovw %k0, %eax
21994 ; NoVLX-NEXT: vzeroupper
21997 %0 = bitcast <2 x i64> %__a to <2 x double>
21998 %load = load <2 x i64>, <2 x i64>* %__b
21999 %1 = bitcast <2 x i64> %load to <2 x double>
22000 %2 = fcmp oeq <2 x double> %0, %1
22001 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22002 %4 = bitcast <32 x i1> %3 to i32
22006 define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
22007 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
22008 ; VLX: # %bb.0: # %entry
22009 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
22010 ; VLX-NEXT: kmovd %k0, %eax
22013 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
22014 ; NoVLX: # %bb.0: # %entry
22015 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22016 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
22017 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22018 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22019 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22020 ; NoVLX-NEXT: kmovw %k0, %eax
22021 ; NoVLX-NEXT: vzeroupper
22024 %0 = bitcast <2 x i64> %__a to <2 x double>
22025 %load = load double, double* %__b
22026 %vec = insertelement <2 x double> undef, double %load, i32 0
22027 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
22028 %2 = fcmp oeq <2 x double> %0, %1
22029 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22030 %4 = bitcast <32 x i1> %3 to i32
22034 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
22035 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
22036 ; VLX: # %bb.0: # %entry
22037 ; VLX-NEXT: kmovd %edi, %k1
22038 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
22039 ; VLX-NEXT: kmovd %k0, %eax
22042 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
22043 ; NoVLX: # %bb.0: # %entry
22044 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
22045 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22046 ; NoVLX-NEXT: kmovw %edi, %k1
22047 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22048 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22049 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22050 ; NoVLX-NEXT: kmovw %k0, %eax
22051 ; NoVLX-NEXT: vzeroupper
22054 %0 = bitcast <2 x i64> %__a to <2 x double>
22055 %1 = bitcast <2 x i64> %__b to <2 x double>
22056 %2 = fcmp oeq <2 x double> %0, %1
22057 %3 = bitcast i2 %__u to <2 x i1>
22058 %4 = and <2 x i1> %2, %3
22059 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22060 %6 = bitcast <32 x i1> %5 to i32
22064 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
22065 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
22066 ; VLX: # %bb.0: # %entry
22067 ; VLX-NEXT: kmovd %edi, %k1
22068 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
22069 ; VLX-NEXT: kmovd %k0, %eax
22072 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
22073 ; NoVLX: # %bb.0: # %entry
22074 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22075 ; NoVLX-NEXT: kmovw %edi, %k1
22076 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
22077 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22078 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22079 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22080 ; NoVLX-NEXT: kmovw %k0, %eax
22081 ; NoVLX-NEXT: vzeroupper
22084 %0 = bitcast <2 x i64> %__a to <2 x double>
22085 %load = load <2 x i64>, <2 x i64>* %__b
22086 %1 = bitcast <2 x i64> %load to <2 x double>
22087 %2 = fcmp oeq <2 x double> %0, %1
22088 %3 = bitcast i2 %__u to <2 x i1>
22089 %4 = and <2 x i1> %2, %3
22090 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22091 %6 = bitcast <32 x i1> %5 to i32
22095 define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
22096 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
22097 ; VLX: # %bb.0: # %entry
22098 ; VLX-NEXT: kmovd %edi, %k1
22099 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
22100 ; VLX-NEXT: kmovd %k0, %eax
22103 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
22104 ; NoVLX: # %bb.0: # %entry
22105 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22106 ; NoVLX-NEXT: kmovw %edi, %k1
22107 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
22108 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22109 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22110 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22111 ; NoVLX-NEXT: kmovw %k0, %eax
22112 ; NoVLX-NEXT: vzeroupper
22115 %0 = bitcast <2 x i64> %__a to <2 x double>
22116 %load = load double, double* %__b
22117 %vec = insertelement <2 x double> undef, double %load, i32 0
22118 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
22119 %2 = fcmp oeq <2 x double> %0, %1
22120 %3 = bitcast i2 %__u to <2 x i1>
22121 %4 = and <2 x i1> %2, %3
22122 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22123 %6 = bitcast <32 x i1> %5 to i32
22129 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
22130 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
22131 ; VLX: # %bb.0: # %entry
22132 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
22133 ; VLX-NEXT: kmovq %k0, %rax
22136 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
22137 ; NoVLX: # %bb.0: # %entry
22138 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
22139 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22140 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22141 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22142 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22143 ; NoVLX-NEXT: kmovw %k0, %eax
22144 ; NoVLX-NEXT: movzwl %ax, %eax
22145 ; NoVLX-NEXT: vzeroupper
22148 %0 = bitcast <2 x i64> %__a to <2 x double>
22149 %1 = bitcast <2 x i64> %__b to <2 x double>
22150 %2 = fcmp oeq <2 x double> %0, %1
22151 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22152 %4 = bitcast <64 x i1> %3 to i64
22156 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
22157 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
22158 ; VLX: # %bb.0: # %entry
22159 ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
22160 ; VLX-NEXT: kmovq %k0, %rax
22163 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
22164 ; NoVLX: # %bb.0: # %entry
22165 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22166 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1
22167 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22168 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22169 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22170 ; NoVLX-NEXT: kmovw %k0, %eax
22171 ; NoVLX-NEXT: movzwl %ax, %eax
22172 ; NoVLX-NEXT: vzeroupper
22175 %0 = bitcast <2 x i64> %__a to <2 x double>
22176 %load = load <2 x i64>, <2 x i64>* %__b
22177 %1 = bitcast <2 x i64> %load to <2 x double>
22178 %2 = fcmp oeq <2 x double> %0, %1
22179 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22180 %4 = bitcast <64 x i1> %3 to i64
22184 define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
22185 ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
22186 ; VLX: # %bb.0: # %entry
22187 ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
22188 ; VLX-NEXT: kmovq %k0, %rax
22191 ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
22192 ; NoVLX: # %bb.0: # %entry
22193 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22194 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
22195 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22196 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22197 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22198 ; NoVLX-NEXT: kmovw %k0, %eax
22199 ; NoVLX-NEXT: movzwl %ax, %eax
22200 ; NoVLX-NEXT: vzeroupper
22203 %0 = bitcast <2 x i64> %__a to <2 x double>
22204 %load = load double, double* %__b
22205 %vec = insertelement <2 x double> undef, double %load, i32 0
22206 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
22207 %2 = fcmp oeq <2 x double> %0, %1
22208 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22209 %4 = bitcast <64 x i1> %3 to i64
22213 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
22214 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
22215 ; VLX: # %bb.0: # %entry
22216 ; VLX-NEXT: kmovd %edi, %k1
22217 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
22218 ; VLX-NEXT: kmovq %k0, %rax
22221 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
22222 ; NoVLX: # %bb.0: # %entry
22223 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
22224 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22225 ; NoVLX-NEXT: kmovw %edi, %k1
22226 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22227 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22228 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22229 ; NoVLX-NEXT: kmovw %k0, %eax
22230 ; NoVLX-NEXT: movzwl %ax, %eax
22231 ; NoVLX-NEXT: vzeroupper
22234 %0 = bitcast <2 x i64> %__a to <2 x double>
22235 %1 = bitcast <2 x i64> %__b to <2 x double>
22236 %2 = fcmp oeq <2 x double> %0, %1
22237 %3 = bitcast i2 %__u to <2 x i1>
22238 %4 = and <2 x i1> %2, %3
22239 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22240 %6 = bitcast <64 x i1> %5 to i64
22244 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
22245 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
22246 ; VLX: # %bb.0: # %entry
22247 ; VLX-NEXT: kmovd %edi, %k1
22248 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
22249 ; VLX-NEXT: kmovq %k0, %rax
22252 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
22253 ; NoVLX: # %bb.0: # %entry
22254 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22255 ; NoVLX-NEXT: kmovw %edi, %k1
22256 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1
22257 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22258 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22259 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22260 ; NoVLX-NEXT: kmovw %k0, %eax
22261 ; NoVLX-NEXT: movzwl %ax, %eax
22262 ; NoVLX-NEXT: vzeroupper
22265 %0 = bitcast <2 x i64> %__a to <2 x double>
22266 %load = load <2 x i64>, <2 x i64>* %__b
22267 %1 = bitcast <2 x i64> %load to <2 x double>
22268 %2 = fcmp oeq <2 x double> %0, %1
22269 %3 = bitcast i2 %__u to <2 x i1>
22270 %4 = and <2 x i1> %2, %3
22271 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22272 %6 = bitcast <64 x i1> %5 to i64
22276 define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
22277 ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
22278 ; VLX: # %bb.0: # %entry
22279 ; VLX-NEXT: kmovd %edi, %k1
22280 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
22281 ; VLX-NEXT: kmovq %k0, %rax
22284 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
22285 ; NoVLX: # %bb.0: # %entry
22286 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22287 ; NoVLX-NEXT: kmovw %edi, %k1
22288 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
22289 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22290 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0
22291 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0
22292 ; NoVLX-NEXT: kmovw %k0, %eax
22293 ; NoVLX-NEXT: movzwl %ax, %eax
22294 ; NoVLX-NEXT: vzeroupper
22297 %0 = bitcast <2 x i64> %__a to <2 x double>
22298 %load = load double, double* %__b
22299 %vec = insertelement <2 x double> undef, double %load, i32 0
22300 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
22301 %2 = fcmp oeq <2 x double> %0, %1
22302 %3 = bitcast i2 %__u to <2 x i1>
22303 %4 = and <2 x i1> %2, %3
22304 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
22305 %6 = bitcast <64 x i1> %5 to i64
22311 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22312 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
22313 ; VLX: # %bb.0: # %entry
22314 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22315 ; VLX-NEXT: kmovd %k0, %eax
22316 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22317 ; VLX-NEXT: vzeroupper
22320 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
22321 ; NoVLX: # %bb.0: # %entry
22322 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22323 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22324 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22325 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22326 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22327 ; NoVLX-NEXT: kmovw %k0, %eax
22328 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22329 ; NoVLX-NEXT: vzeroupper
22332 %0 = bitcast <4 x i64> %__a to <4 x double>
22333 %1 = bitcast <4 x i64> %__b to <4 x double>
22334 %2 = fcmp oeq <4 x double> %0, %1
22335 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22336 %4 = bitcast <8 x i1> %3 to i8
22340 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22341 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
22342 ; VLX: # %bb.0: # %entry
22343 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22344 ; VLX-NEXT: kmovd %k0, %eax
22345 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22346 ; VLX-NEXT: vzeroupper
22349 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
22350 ; NoVLX: # %bb.0: # %entry
22351 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22352 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22353 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22354 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22355 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22356 ; NoVLX-NEXT: kmovw %k0, %eax
22357 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22358 ; NoVLX-NEXT: vzeroupper
22361 %0 = bitcast <4 x i64> %__a to <4 x double>
22362 %load = load <4 x i64>, <4 x i64>* %__b
22363 %1 = bitcast <4 x i64> %load to <4 x double>
22364 %2 = fcmp oeq <4 x double> %0, %1
22365 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22366 %4 = bitcast <8 x i1> %3 to i8
22370 define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22371 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22372 ; VLX: # %bb.0: # %entry
22373 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22374 ; VLX-NEXT: kmovd %k0, %eax
22375 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22376 ; VLX-NEXT: vzeroupper
22379 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22380 ; NoVLX: # %bb.0: # %entry
22381 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22382 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
22383 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22384 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22385 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22386 ; NoVLX-NEXT: kmovw %k0, %eax
22387 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22388 ; NoVLX-NEXT: vzeroupper
22391 %0 = bitcast <4 x i64> %__a to <4 x double>
22392 %load = load double, double* %__b
22393 %vec = insertelement <4 x double> undef, double %load, i32 0
22394 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22395 %2 = fcmp oeq <4 x double> %0, %1
22396 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22397 %4 = bitcast <8 x i1> %3 to i8
22401 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22402 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22403 ; VLX: # %bb.0: # %entry
22404 ; VLX-NEXT: kmovd %edi, %k1
22405 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22406 ; VLX-NEXT: kmovd %k0, %eax
22407 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22408 ; VLX-NEXT: vzeroupper
22411 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
22412 ; NoVLX: # %bb.0: # %entry
22413 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22414 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22415 ; NoVLX-NEXT: kmovw %edi, %k1
22416 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22417 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22418 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22419 ; NoVLX-NEXT: kmovw %k0, %eax
22420 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22421 ; NoVLX-NEXT: vzeroupper
22424 %0 = bitcast <4 x i64> %__a to <4 x double>
22425 %1 = bitcast <4 x i64> %__b to <4 x double>
22426 %2 = fcmp oeq <4 x double> %0, %1
22427 %3 = bitcast i4 %__u to <4 x i1>
22428 %4 = and <4 x i1> %2, %3
22429 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22430 %6 = bitcast <8 x i1> %5 to i8
22434 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22435 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22436 ; VLX: # %bb.0: # %entry
22437 ; VLX-NEXT: kmovd %edi, %k1
22438 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22439 ; VLX-NEXT: kmovd %k0, %eax
22440 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22441 ; VLX-NEXT: vzeroupper
22444 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
22445 ; NoVLX: # %bb.0: # %entry
22446 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22447 ; NoVLX-NEXT: kmovw %edi, %k1
22448 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22449 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22450 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22451 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22452 ; NoVLX-NEXT: kmovw %k0, %eax
22453 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22454 ; NoVLX-NEXT: vzeroupper
22457 %0 = bitcast <4 x i64> %__a to <4 x double>
22458 %load = load <4 x i64>, <4 x i64>* %__b
22459 %1 = bitcast <4 x i64> %load to <4 x double>
22460 %2 = fcmp oeq <4 x double> %0, %1
22461 %3 = bitcast i4 %__u to <4 x i1>
22462 %4 = and <4 x i1> %2, %3
22463 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22464 %6 = bitcast <8 x i1> %5 to i8
22468 define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22469 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22470 ; VLX: # %bb.0: # %entry
22471 ; VLX-NEXT: kmovd %edi, %k1
22472 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22473 ; VLX-NEXT: kmovd %k0, %eax
22474 ; VLX-NEXT: # kill: def $al killed $al killed $eax
22475 ; VLX-NEXT: vzeroupper
22478 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
22479 ; NoVLX: # %bb.0: # %entry
22480 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22481 ; NoVLX-NEXT: kmovw %edi, %k1
22482 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1
22483 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22484 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22485 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22486 ; NoVLX-NEXT: kmovw %k0, %eax
22487 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
22488 ; NoVLX-NEXT: vzeroupper
22491 %0 = bitcast <4 x i64> %__a to <4 x double>
22492 %load = load double, double* %__b
22493 %vec = insertelement <4 x double> undef, double %load, i32 0
22494 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22495 %2 = fcmp oeq <4 x double> %0, %1
22496 %3 = bitcast i4 %__u to <4 x i1>
22497 %4 = and <4 x i1> %2, %3
22498 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22499 %6 = bitcast <8 x i1> %5 to i8
22505 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22506 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22507 ; VLX: # %bb.0: # %entry
22508 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22509 ; VLX-NEXT: kmovd %k0, %eax
22510 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22511 ; VLX-NEXT: vzeroupper
22514 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
22515 ; NoVLX: # %bb.0: # %entry
22516 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22517 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22518 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22519 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22520 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22521 ; NoVLX-NEXT: kmovw %k0, %eax
22522 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22523 ; NoVLX-NEXT: vzeroupper
22526 %0 = bitcast <4 x i64> %__a to <4 x double>
22527 %1 = bitcast <4 x i64> %__b to <4 x double>
22528 %2 = fcmp oeq <4 x double> %0, %1
22529 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22530 %4 = bitcast <16 x i1> %3 to i16
22534 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22535 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22536 ; VLX: # %bb.0: # %entry
22537 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22538 ; VLX-NEXT: kmovd %k0, %eax
22539 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22540 ; VLX-NEXT: vzeroupper
22543 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
22544 ; NoVLX: # %bb.0: # %entry
22545 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22546 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22547 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22548 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22549 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22550 ; NoVLX-NEXT: kmovw %k0, %eax
22551 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22552 ; NoVLX-NEXT: vzeroupper
22555 %0 = bitcast <4 x i64> %__a to <4 x double>
22556 %load = load <4 x i64>, <4 x i64>* %__b
22557 %1 = bitcast <4 x i64> %load to <4 x double>
22558 %2 = fcmp oeq <4 x double> %0, %1
22559 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22560 %4 = bitcast <16 x i1> %3 to i16
22564 define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22565 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22566 ; VLX: # %bb.0: # %entry
22567 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22568 ; VLX-NEXT: kmovd %k0, %eax
22569 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22570 ; VLX-NEXT: vzeroupper
22573 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22574 ; NoVLX: # %bb.0: # %entry
22575 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22576 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
22577 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22578 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22579 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22580 ; NoVLX-NEXT: kmovw %k0, %eax
22581 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22582 ; NoVLX-NEXT: vzeroupper
22585 %0 = bitcast <4 x i64> %__a to <4 x double>
22586 %load = load double, double* %__b
22587 %vec = insertelement <4 x double> undef, double %load, i32 0
22588 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22589 %2 = fcmp oeq <4 x double> %0, %1
22590 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22591 %4 = bitcast <16 x i1> %3 to i16
22595 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22596 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22597 ; VLX: # %bb.0: # %entry
22598 ; VLX-NEXT: kmovd %edi, %k1
22599 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22600 ; VLX-NEXT: kmovd %k0, %eax
22601 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22602 ; VLX-NEXT: vzeroupper
22605 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
22606 ; NoVLX: # %bb.0: # %entry
22607 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22608 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22609 ; NoVLX-NEXT: kmovw %edi, %k1
22610 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22611 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22612 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22613 ; NoVLX-NEXT: kmovw %k0, %eax
22614 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22615 ; NoVLX-NEXT: vzeroupper
22618 %0 = bitcast <4 x i64> %__a to <4 x double>
22619 %1 = bitcast <4 x i64> %__b to <4 x double>
22620 %2 = fcmp oeq <4 x double> %0, %1
22621 %3 = bitcast i4 %__u to <4 x i1>
22622 %4 = and <4 x i1> %2, %3
22623 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22624 %6 = bitcast <16 x i1> %5 to i16
22628 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22629 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22630 ; VLX: # %bb.0: # %entry
22631 ; VLX-NEXT: kmovd %edi, %k1
22632 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22633 ; VLX-NEXT: kmovd %k0, %eax
22634 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22635 ; VLX-NEXT: vzeroupper
22638 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
22639 ; NoVLX: # %bb.0: # %entry
22640 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22641 ; NoVLX-NEXT: kmovw %edi, %k1
22642 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22643 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22644 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22645 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22646 ; NoVLX-NEXT: kmovw %k0, %eax
22647 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22648 ; NoVLX-NEXT: vzeroupper
22651 %0 = bitcast <4 x i64> %__a to <4 x double>
22652 %load = load <4 x i64>, <4 x i64>* %__b
22653 %1 = bitcast <4 x i64> %load to <4 x double>
22654 %2 = fcmp oeq <4 x double> %0, %1
22655 %3 = bitcast i4 %__u to <4 x i1>
22656 %4 = and <4 x i1> %2, %3
22657 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22658 %6 = bitcast <16 x i1> %5 to i16
22662 define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22663 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22664 ; VLX: # %bb.0: # %entry
22665 ; VLX-NEXT: kmovd %edi, %k1
22666 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22667 ; VLX-NEXT: kmovd %k0, %eax
22668 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
22669 ; VLX-NEXT: vzeroupper
22672 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
22673 ; NoVLX: # %bb.0: # %entry
22674 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22675 ; NoVLX-NEXT: kmovw %edi, %k1
22676 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1
22677 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22678 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22679 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22680 ; NoVLX-NEXT: kmovw %k0, %eax
22681 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
22682 ; NoVLX-NEXT: vzeroupper
22685 %0 = bitcast <4 x i64> %__a to <4 x double>
22686 %load = load double, double* %__b
22687 %vec = insertelement <4 x double> undef, double %load, i32 0
22688 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22689 %2 = fcmp oeq <4 x double> %0, %1
22690 %3 = bitcast i4 %__u to <4 x i1>
22691 %4 = and <4 x i1> %2, %3
22692 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22693 %6 = bitcast <16 x i1> %5 to i16
22699 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22700 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22701 ; VLX: # %bb.0: # %entry
22702 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22703 ; VLX-NEXT: kmovd %k0, %eax
22704 ; VLX-NEXT: vzeroupper
22707 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
22708 ; NoVLX: # %bb.0: # %entry
22709 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22710 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22711 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22712 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22713 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22714 ; NoVLX-NEXT: kmovw %k0, %eax
22715 ; NoVLX-NEXT: vzeroupper
22718 %0 = bitcast <4 x i64> %__a to <4 x double>
22719 %1 = bitcast <4 x i64> %__b to <4 x double>
22720 %2 = fcmp oeq <4 x double> %0, %1
22721 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22722 %4 = bitcast <32 x i1> %3 to i32
22726 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22727 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22728 ; VLX: # %bb.0: # %entry
22729 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22730 ; VLX-NEXT: kmovd %k0, %eax
22731 ; VLX-NEXT: vzeroupper
22734 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
22735 ; NoVLX: # %bb.0: # %entry
22736 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22737 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22738 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22739 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22740 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22741 ; NoVLX-NEXT: kmovw %k0, %eax
22742 ; NoVLX-NEXT: vzeroupper
22745 %0 = bitcast <4 x i64> %__a to <4 x double>
22746 %load = load <4 x i64>, <4 x i64>* %__b
22747 %1 = bitcast <4 x i64> %load to <4 x double>
22748 %2 = fcmp oeq <4 x double> %0, %1
22749 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22750 %4 = bitcast <32 x i1> %3 to i32
22754 define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22755 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22756 ; VLX: # %bb.0: # %entry
22757 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22758 ; VLX-NEXT: kmovd %k0, %eax
22759 ; VLX-NEXT: vzeroupper
22762 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22763 ; NoVLX: # %bb.0: # %entry
22764 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22765 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
22766 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22767 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22768 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22769 ; NoVLX-NEXT: kmovw %k0, %eax
22770 ; NoVLX-NEXT: vzeroupper
22773 %0 = bitcast <4 x i64> %__a to <4 x double>
22774 %load = load double, double* %__b
22775 %vec = insertelement <4 x double> undef, double %load, i32 0
22776 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22777 %2 = fcmp oeq <4 x double> %0, %1
22778 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22779 %4 = bitcast <32 x i1> %3 to i32
22783 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22784 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22785 ; VLX: # %bb.0: # %entry
22786 ; VLX-NEXT: kmovd %edi, %k1
22787 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22788 ; VLX-NEXT: kmovd %k0, %eax
22789 ; VLX-NEXT: vzeroupper
22792 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
22793 ; NoVLX: # %bb.0: # %entry
22794 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22795 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22796 ; NoVLX-NEXT: kmovw %edi, %k1
22797 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22798 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22799 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22800 ; NoVLX-NEXT: kmovw %k0, %eax
22801 ; NoVLX-NEXT: vzeroupper
22804 %0 = bitcast <4 x i64> %__a to <4 x double>
22805 %1 = bitcast <4 x i64> %__b to <4 x double>
22806 %2 = fcmp oeq <4 x double> %0, %1
22807 %3 = bitcast i4 %__u to <4 x i1>
22808 %4 = and <4 x i1> %2, %3
22809 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22810 %6 = bitcast <32 x i1> %5 to i32
22814 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22815 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22816 ; VLX: # %bb.0: # %entry
22817 ; VLX-NEXT: kmovd %edi, %k1
22818 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
22819 ; VLX-NEXT: kmovd %k0, %eax
22820 ; VLX-NEXT: vzeroupper
22823 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
22824 ; NoVLX: # %bb.0: # %entry
22825 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22826 ; NoVLX-NEXT: kmovw %edi, %k1
22827 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
22828 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22829 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22830 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22831 ; NoVLX-NEXT: kmovw %k0, %eax
22832 ; NoVLX-NEXT: vzeroupper
22835 %0 = bitcast <4 x i64> %__a to <4 x double>
22836 %load = load <4 x i64>, <4 x i64>* %__b
22837 %1 = bitcast <4 x i64> %load to <4 x double>
22838 %2 = fcmp oeq <4 x double> %0, %1
22839 %3 = bitcast i4 %__u to <4 x i1>
22840 %4 = and <4 x i1> %2, %3
22841 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22842 %6 = bitcast <32 x i1> %5 to i32
22846 define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
22847 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22848 ; VLX: # %bb.0: # %entry
22849 ; VLX-NEXT: kmovd %edi, %k1
22850 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
22851 ; VLX-NEXT: kmovd %k0, %eax
22852 ; VLX-NEXT: vzeroupper
22855 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
22856 ; NoVLX: # %bb.0: # %entry
22857 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22858 ; NoVLX-NEXT: kmovw %edi, %k1
22859 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1
22860 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22861 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22862 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22863 ; NoVLX-NEXT: kmovw %k0, %eax
22864 ; NoVLX-NEXT: vzeroupper
22867 %0 = bitcast <4 x i64> %__a to <4 x double>
22868 %load = load double, double* %__b
22869 %vec = insertelement <4 x double> undef, double %load, i32 0
22870 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22871 %2 = fcmp oeq <4 x double> %0, %1
22872 %3 = bitcast i4 %__u to <4 x i1>
22873 %4 = and <4 x i1> %2, %3
22874 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22875 %6 = bitcast <32 x i1> %5 to i32
22881 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22882 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22883 ; VLX: # %bb.0: # %entry
22884 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
22885 ; VLX-NEXT: kmovq %k0, %rax
22886 ; VLX-NEXT: vzeroupper
22889 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
22890 ; NoVLX: # %bb.0: # %entry
22891 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22892 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22893 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22894 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22895 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22896 ; NoVLX-NEXT: kmovw %k0, %eax
22897 ; NoVLX-NEXT: movzwl %ax, %eax
22898 ; NoVLX-NEXT: vzeroupper
22901 %0 = bitcast <4 x i64> %__a to <4 x double>
22902 %1 = bitcast <4 x i64> %__b to <4 x double>
22903 %2 = fcmp oeq <4 x double> %0, %1
22904 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22905 %4 = bitcast <64 x i1> %3 to i64
22909 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
22910 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22911 ; VLX: # %bb.0: # %entry
22912 ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
22913 ; VLX-NEXT: kmovq %k0, %rax
22914 ; VLX-NEXT: vzeroupper
22917 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
22918 ; NoVLX: # %bb.0: # %entry
22919 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22920 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1
22921 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22922 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22923 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22924 ; NoVLX-NEXT: kmovw %k0, %eax
22925 ; NoVLX-NEXT: movzwl %ax, %eax
22926 ; NoVLX-NEXT: vzeroupper
22929 %0 = bitcast <4 x i64> %__a to <4 x double>
22930 %load = load <4 x i64>, <4 x i64>* %__b
22931 %1 = bitcast <4 x i64> %load to <4 x double>
22932 %2 = fcmp oeq <4 x double> %0, %1
22933 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22934 %4 = bitcast <64 x i1> %3 to i64
22938 define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
22939 ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22940 ; VLX: # %bb.0: # %entry
22941 ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
22942 ; VLX-NEXT: kmovq %k0, %rax
22943 ; VLX-NEXT: vzeroupper
22946 ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
22947 ; NoVLX: # %bb.0: # %entry
22948 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22949 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
22950 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
22951 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22952 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22953 ; NoVLX-NEXT: kmovw %k0, %eax
22954 ; NoVLX-NEXT: movzwl %ax, %eax
22955 ; NoVLX-NEXT: vzeroupper
22958 %0 = bitcast <4 x i64> %__a to <4 x double>
22959 %load = load double, double* %__b
22960 %vec = insertelement <4 x double> undef, double %load, i32 0
22961 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
22962 %2 = fcmp oeq <4 x double> %0, %1
22963 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22964 %4 = bitcast <64 x i1> %3 to i64
22968 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
22969 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
22970 ; VLX: # %bb.0: # %entry
22971 ; VLX-NEXT: kmovd %edi, %k1
22972 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
22973 ; VLX-NEXT: kmovq %k0, %rax
22974 ; VLX-NEXT: vzeroupper
22977 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
22978 ; NoVLX: # %bb.0: # %entry
22979 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
22980 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
22981 ; NoVLX-NEXT: kmovw %edi, %k1
22982 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
22983 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
22984 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
22985 ; NoVLX-NEXT: kmovw %k0, %eax
22986 ; NoVLX-NEXT: movzwl %ax, %eax
22987 ; NoVLX-NEXT: vzeroupper
22990 %0 = bitcast <4 x i64> %__a to <4 x double>
22991 %1 = bitcast <4 x i64> %__b to <4 x double>
22992 %2 = fcmp oeq <4 x double> %0, %1
22993 %3 = bitcast i4 %__u to <4 x i1>
22994 %4 = and <4 x i1> %2, %3
22995 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
22996 %6 = bitcast <64 x i1> %5 to i64
23000 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
23001 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
23002 ; VLX: # %bb.0: # %entry
23003 ; VLX-NEXT: kmovd %edi, %k1
23004 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
23005 ; VLX-NEXT: kmovq %k0, %rax
23006 ; VLX-NEXT: vzeroupper
23009 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
23010 ; NoVLX: # %bb.0: # %entry
23011 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
23012 ; NoVLX-NEXT: kmovw %edi, %k1
23013 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1
23014 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23015 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
23016 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
23017 ; NoVLX-NEXT: kmovw %k0, %eax
23018 ; NoVLX-NEXT: movzwl %ax, %eax
23019 ; NoVLX-NEXT: vzeroupper
23022 %0 = bitcast <4 x i64> %__a to <4 x double>
23023 %load = load <4 x i64>, <4 x i64>* %__b
23024 %1 = bitcast <4 x i64> %load to <4 x double>
23025 %2 = fcmp oeq <4 x double> %0, %1
23026 %3 = bitcast i4 %__u to <4 x i1>
23027 %4 = and <4 x i1> %2, %3
23028 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
23029 %6 = bitcast <64 x i1> %5 to i64
23033 define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
23034 ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
23035 ; VLX: # %bb.0: # %entry
23036 ; VLX-NEXT: kmovd %edi, %k1
23037 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
23038 ; VLX-NEXT: kmovq %k0, %rax
23039 ; VLX-NEXT: vzeroupper
23042 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
23043 ; NoVLX: # %bb.0: # %entry
23044 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
23045 ; NoVLX-NEXT: kmovw %edi, %k1
23046 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1
23047 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23048 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
23049 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
23050 ; NoVLX-NEXT: kmovw %k0, %eax
23051 ; NoVLX-NEXT: movzwl %ax, %eax
23052 ; NoVLX-NEXT: vzeroupper
23055 %0 = bitcast <4 x i64> %__a to <4 x double>
23056 %load = load double, double* %__b
23057 %vec = insertelement <4 x double> undef, double %load, i32 0
23058 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
23059 %2 = fcmp oeq <4 x double> %0, %1
23060 %3 = bitcast i4 %__u to <4 x i1>
23061 %4 = and <4 x i1> %2, %3
23062 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
23063 %6 = bitcast <64 x i1> %5 to i64
23069 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23070 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
23071 ; VLX: # %bb.0: # %entry
23072 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23073 ; VLX-NEXT: kmovd %k0, %eax
23074 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
23075 ; VLX-NEXT: vzeroupper
23078 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
23079 ; NoVLX: # %bb.0: # %entry
23080 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23081 ; NoVLX-NEXT: kmovw %k0, %eax
23082 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
23083 ; NoVLX-NEXT: vzeroupper
23086 %0 = bitcast <8 x i64> %__a to <8 x double>
23087 %1 = bitcast <8 x i64> %__b to <8 x double>
23088 %2 = fcmp oeq <8 x double> %0, %1
23089 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23090 %4 = bitcast <16 x i1> %3 to i16
23094 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23095 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
23096 ; VLX: # %bb.0: # %entry
23097 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23098 ; VLX-NEXT: kmovd %k0, %eax
23099 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
23100 ; VLX-NEXT: vzeroupper
23103 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
23104 ; NoVLX: # %bb.0: # %entry
23105 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23106 ; NoVLX-NEXT: kmovw %k0, %eax
23107 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
23108 ; NoVLX-NEXT: vzeroupper
23111 %0 = bitcast <8 x i64> %__a to <8 x double>
23112 %load = load <8 x i64>, <8 x i64>* %__b
23113 %1 = bitcast <8 x i64> %load to <8 x double>
23114 %2 = fcmp oeq <8 x double> %0, %1
23115 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23116 %4 = bitcast <16 x i1> %3 to i16
23120 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
23121 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
23122 ; VLX: # %bb.0: # %entry
23123 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23124 ; VLX-NEXT: kmovd %k0, %eax
23125 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
23126 ; VLX-NEXT: vzeroupper
23129 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
23130 ; NoVLX: # %bb.0: # %entry
23131 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23132 ; NoVLX-NEXT: kmovw %k0, %eax
23133 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
23134 ; NoVLX-NEXT: vzeroupper
23137 %0 = bitcast <8 x i64> %__a to <8 x double>
23138 %load = load double, double* %__b
23139 %vec = insertelement <8 x double> undef, double %load, i32 0
23140 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23141 %2 = fcmp oeq <8 x double> %0, %1
23142 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23143 %4 = bitcast <16 x i1> %3 to i16
23147 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23148 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
23149 ; VLX: # %bb.0: # %entry
23150 ; VLX-NEXT: kmovd %edi, %k1
23151 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23152 ; VLX-NEXT: kmovd %k0, %eax
23153 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
23154 ; VLX-NEXT: vzeroupper
23157 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
23158 ; NoVLX: # %bb.0: # %entry
23159 ; NoVLX-NEXT: kmovw %edi, %k1
23160 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23161 ; NoVLX-NEXT: kmovw %k0, %eax
23162 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
23163 ; NoVLX-NEXT: vzeroupper
23166 %0 = bitcast <8 x i64> %__a to <8 x double>
23167 %1 = bitcast <8 x i64> %__b to <8 x double>
23168 %2 = fcmp oeq <8 x double> %0, %1
23169 %3 = bitcast i8 %__u to <8 x i1>
23170 %4 = and <8 x i1> %2, %3
23171 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23172 %6 = bitcast <16 x i1> %5 to i16
23176 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23177 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
23178 ; VLX: # %bb.0: # %entry
23179 ; VLX-NEXT: kmovd %edi, %k1
23180 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23181 ; VLX-NEXT: kmovd %k0, %eax
23182 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
23183 ; VLX-NEXT: vzeroupper
23186 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
23187 ; NoVLX: # %bb.0: # %entry
23188 ; NoVLX-NEXT: kmovw %edi, %k1
23189 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23190 ; NoVLX-NEXT: kmovw %k0, %eax
23191 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
23192 ; NoVLX-NEXT: vzeroupper
23195 %0 = bitcast <8 x i64> %__a to <8 x double>
23196 %load = load <8 x i64>, <8 x i64>* %__b
23197 %1 = bitcast <8 x i64> %load to <8 x double>
23198 %2 = fcmp oeq <8 x double> %0, %1
23199 %3 = bitcast i8 %__u to <8 x i1>
23200 %4 = and <8 x i1> %2, %3
23201 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23202 %6 = bitcast <16 x i1> %5 to i16
23206 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
23207 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
23208 ; VLX: # %bb.0: # %entry
23209 ; VLX-NEXT: kmovd %edi, %k1
23210 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23211 ; VLX-NEXT: kmovd %k0, %eax
23212 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
23213 ; VLX-NEXT: vzeroupper
23216 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
23217 ; NoVLX: # %bb.0: # %entry
23218 ; NoVLX-NEXT: kmovw %edi, %k1
23219 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23220 ; NoVLX-NEXT: kmovw %k0, %eax
23221 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
23222 ; NoVLX-NEXT: vzeroupper
23225 %0 = bitcast <8 x i64> %__a to <8 x double>
23226 %load = load double, double* %__b
23227 %vec = insertelement <8 x double> undef, double %load, i32 0
23228 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23229 %2 = fcmp oeq <8 x double> %0, %1
23230 %3 = bitcast i8 %__u to <8 x i1>
23231 %4 = and <8 x i1> %2, %3
23232 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23233 %6 = bitcast <16 x i1> %5 to i16
23239 define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23240 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
23241 ; VLX: # %bb.0: # %entry
23242 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23243 ; VLX-NEXT: kmovd %k0, %eax
23244 ; VLX-NEXT: movzbl %al, %eax
23245 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
23246 ; VLX-NEXT: vzeroupper
23249 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
23250 ; NoVLX: # %bb.0: # %entry
23251 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23252 ; NoVLX-NEXT: kmovw %k0, %eax
23253 ; NoVLX-NEXT: movzbl %al, %eax
23254 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
23255 ; NoVLX-NEXT: vzeroupper
23258 %0 = bitcast <8 x i64> %__a to <8 x double>
23259 %1 = bitcast <8 x i64> %__b to <8 x double>
23260 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23261 %3 = bitcast <8 x i1> %2 to i8
23262 %4 = zext i8 %3 to i16
23266 define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23267 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
23268 ; VLX: # %bb.0: # %entry
23269 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23270 ; VLX-NEXT: kmovd %k0, %eax
23271 ; VLX-NEXT: andb %dil, %al
23272 ; VLX-NEXT: movzbl %al, %eax
23273 ; VLX-NEXT: # kill: def $ax killed $ax killed $eax
23274 ; VLX-NEXT: vzeroupper
23277 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
23278 ; NoVLX: # %bb.0: # %entry
23279 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23280 ; NoVLX-NEXT: kmovw %k0, %eax
23281 ; NoVLX-NEXT: andb %dil, %al
23282 ; NoVLX-NEXT: movzbl %al, %eax
23283 ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax
23284 ; NoVLX-NEXT: vzeroupper
23287 %0 = bitcast <8 x i64> %__a to <8 x double>
23288 %1 = bitcast <8 x i64> %__b to <8 x double>
23289 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23290 %3 = bitcast i8 %__u to <8 x i1>
23291 %4 = and <8 x i1> %2, %3
23292 %5 = bitcast <8 x i1> %4 to i8
23293 %6 = zext i8 %5 to i16
23299 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23300 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
23301 ; VLX: # %bb.0: # %entry
23302 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23303 ; VLX-NEXT: kmovd %k0, %eax
23304 ; VLX-NEXT: vzeroupper
23307 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
23308 ; NoVLX: # %bb.0: # %entry
23309 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23310 ; NoVLX-NEXT: kmovw %k0, %eax
23311 ; NoVLX-NEXT: vzeroupper
23314 %0 = bitcast <8 x i64> %__a to <8 x double>
23315 %1 = bitcast <8 x i64> %__b to <8 x double>
23316 %2 = fcmp oeq <8 x double> %0, %1
23317 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23318 %4 = bitcast <32 x i1> %3 to i32
23322 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23323 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
23324 ; VLX: # %bb.0: # %entry
23325 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23326 ; VLX-NEXT: kmovd %k0, %eax
23327 ; VLX-NEXT: vzeroupper
23330 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
23331 ; NoVLX: # %bb.0: # %entry
23332 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23333 ; NoVLX-NEXT: kmovw %k0, %eax
23334 ; NoVLX-NEXT: vzeroupper
23337 %0 = bitcast <8 x i64> %__a to <8 x double>
23338 %load = load <8 x i64>, <8 x i64>* %__b
23339 %1 = bitcast <8 x i64> %load to <8 x double>
23340 %2 = fcmp oeq <8 x double> %0, %1
23341 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23342 %4 = bitcast <32 x i1> %3 to i32
23346 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
23347 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23348 ; VLX: # %bb.0: # %entry
23349 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23350 ; VLX-NEXT: kmovd %k0, %eax
23351 ; VLX-NEXT: vzeroupper
23354 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23355 ; NoVLX: # %bb.0: # %entry
23356 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23357 ; NoVLX-NEXT: kmovw %k0, %eax
23358 ; NoVLX-NEXT: vzeroupper
23361 %0 = bitcast <8 x i64> %__a to <8 x double>
23362 %load = load double, double* %__b
23363 %vec = insertelement <8 x double> undef, double %load, i32 0
23364 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23365 %2 = fcmp oeq <8 x double> %0, %1
23366 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23367 %4 = bitcast <32 x i1> %3 to i32
23371 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23372 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
23373 ; VLX: # %bb.0: # %entry
23374 ; VLX-NEXT: kmovd %edi, %k1
23375 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23376 ; VLX-NEXT: kmovd %k0, %eax
23377 ; VLX-NEXT: vzeroupper
23380 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
23381 ; NoVLX: # %bb.0: # %entry
23382 ; NoVLX-NEXT: kmovw %edi, %k1
23383 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23384 ; NoVLX-NEXT: kmovw %k0, %eax
23385 ; NoVLX-NEXT: vzeroupper
23388 %0 = bitcast <8 x i64> %__a to <8 x double>
23389 %1 = bitcast <8 x i64> %__b to <8 x double>
23390 %2 = fcmp oeq <8 x double> %0, %1
23391 %3 = bitcast i8 %__u to <8 x i1>
23392 %4 = and <8 x i1> %2, %3
23393 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23394 %6 = bitcast <32 x i1> %5 to i32
23398 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23399 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23400 ; VLX: # %bb.0: # %entry
23401 ; VLX-NEXT: kmovd %edi, %k1
23402 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23403 ; VLX-NEXT: kmovd %k0, %eax
23404 ; VLX-NEXT: vzeroupper
23407 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
23408 ; NoVLX: # %bb.0: # %entry
23409 ; NoVLX-NEXT: kmovw %edi, %k1
23410 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23411 ; NoVLX-NEXT: kmovw %k0, %eax
23412 ; NoVLX-NEXT: vzeroupper
23415 %0 = bitcast <8 x i64> %__a to <8 x double>
23416 %load = load <8 x i64>, <8 x i64>* %__b
23417 %1 = bitcast <8 x i64> %load to <8 x double>
23418 %2 = fcmp oeq <8 x double> %0, %1
23419 %3 = bitcast i8 %__u to <8 x i1>
23420 %4 = and <8 x i1> %2, %3
23421 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23422 %6 = bitcast <32 x i1> %5 to i32
23426 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
23427 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23428 ; VLX: # %bb.0: # %entry
23429 ; VLX-NEXT: kmovd %edi, %k1
23430 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23431 ; VLX-NEXT: kmovd %k0, %eax
23432 ; VLX-NEXT: vzeroupper
23435 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
23436 ; NoVLX: # %bb.0: # %entry
23437 ; NoVLX-NEXT: kmovw %edi, %k1
23438 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23439 ; NoVLX-NEXT: kmovw %k0, %eax
23440 ; NoVLX-NEXT: vzeroupper
23443 %0 = bitcast <8 x i64> %__a to <8 x double>
23444 %load = load double, double* %__b
23445 %vec = insertelement <8 x double> undef, double %load, i32 0
23446 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23447 %2 = fcmp oeq <8 x double> %0, %1
23448 %3 = bitcast i8 %__u to <8 x i1>
23449 %4 = and <8 x i1> %2, %3
23450 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23451 %6 = bitcast <32 x i1> %5 to i32
23457 define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23458 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23459 ; VLX: # %bb.0: # %entry
23460 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23461 ; VLX-NEXT: kmovb %k0, %eax
23462 ; VLX-NEXT: vzeroupper
23465 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
23466 ; NoVLX: # %bb.0: # %entry
23467 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23468 ; NoVLX-NEXT: kmovw %k0, %eax
23469 ; NoVLX-NEXT: movzbl %al, %eax
23470 ; NoVLX-NEXT: vzeroupper
23473 %0 = bitcast <8 x i64> %__a to <8 x double>
23474 %1 = bitcast <8 x i64> %__b to <8 x double>
23475 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23476 %3 = bitcast <8 x i1> %2 to i8
23477 %4 = zext i8 %3 to i32
23481 define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23482 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23483 ; VLX: # %bb.0: # %entry
23484 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23485 ; VLX-NEXT: kmovd %k0, %eax
23486 ; VLX-NEXT: andb %dil, %al
23487 ; VLX-NEXT: movzbl %al, %eax
23488 ; VLX-NEXT: vzeroupper
23491 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
23492 ; NoVLX: # %bb.0: # %entry
23493 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23494 ; NoVLX-NEXT: kmovw %k0, %eax
23495 ; NoVLX-NEXT: andb %dil, %al
23496 ; NoVLX-NEXT: movzbl %al, %eax
23497 ; NoVLX-NEXT: vzeroupper
23500 %0 = bitcast <8 x i64> %__a to <8 x double>
23501 %1 = bitcast <8 x i64> %__b to <8 x double>
23502 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23503 %3 = bitcast i8 %__u to <8 x i1>
23504 %4 = and <8 x i1> %2, %3
23505 %5 = bitcast <8 x i1> %4 to i8
23506 %6 = zext i8 %5 to i32
23512 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23513 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23514 ; VLX: # %bb.0: # %entry
23515 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23516 ; VLX-NEXT: kmovq %k0, %rax
23517 ; VLX-NEXT: vzeroupper
23520 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
23521 ; NoVLX: # %bb.0: # %entry
23522 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
23523 ; NoVLX-NEXT: kmovw %k0, %eax
23524 ; NoVLX-NEXT: movzwl %ax, %eax
23525 ; NoVLX-NEXT: vzeroupper
23528 %0 = bitcast <8 x i64> %__a to <8 x double>
23529 %1 = bitcast <8 x i64> %__b to <8 x double>
23530 %2 = fcmp oeq <8 x double> %0, %1
23531 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23532 %4 = bitcast <64 x i1> %3 to i64
23536 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23537 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23538 ; VLX: # %bb.0: # %entry
23539 ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23540 ; VLX-NEXT: kmovq %k0, %rax
23541 ; VLX-NEXT: vzeroupper
23544 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
23545 ; NoVLX: # %bb.0: # %entry
23546 ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
23547 ; NoVLX-NEXT: kmovw %k0, %eax
23548 ; NoVLX-NEXT: movzwl %ax, %eax
23549 ; NoVLX-NEXT: vzeroupper
23552 %0 = bitcast <8 x i64> %__a to <8 x double>
23553 %load = load <8 x i64>, <8 x i64>* %__b
23554 %1 = bitcast <8 x i64> %load to <8 x double>
23555 %2 = fcmp oeq <8 x double> %0, %1
23556 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23557 %4 = bitcast <64 x i1> %3 to i64
23561 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
23562 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23563 ; VLX: # %bb.0: # %entry
23564 ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23565 ; VLX-NEXT: kmovq %k0, %rax
23566 ; VLX-NEXT: vzeroupper
23569 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23570 ; NoVLX: # %bb.0: # %entry
23571 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
23572 ; NoVLX-NEXT: kmovw %k0, %eax
23573 ; NoVLX-NEXT: movzwl %ax, %eax
23574 ; NoVLX-NEXT: vzeroupper
23577 %0 = bitcast <8 x i64> %__a to <8 x double>
23578 %load = load double, double* %__b
23579 %vec = insertelement <8 x double> undef, double %load, i32 0
23580 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23581 %2 = fcmp oeq <8 x double> %0, %1
23582 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23583 %4 = bitcast <64 x i1> %3 to i64
23587 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23588 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23589 ; VLX: # %bb.0: # %entry
23590 ; VLX-NEXT: kmovd %edi, %k1
23591 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23592 ; VLX-NEXT: kmovq %k0, %rax
23593 ; VLX-NEXT: vzeroupper
23596 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
23597 ; NoVLX: # %bb.0: # %entry
23598 ; NoVLX-NEXT: kmovw %edi, %k1
23599 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
23600 ; NoVLX-NEXT: kmovw %k0, %eax
23601 ; NoVLX-NEXT: movzwl %ax, %eax
23602 ; NoVLX-NEXT: vzeroupper
23605 %0 = bitcast <8 x i64> %__a to <8 x double>
23606 %1 = bitcast <8 x i64> %__b to <8 x double>
23607 %2 = fcmp oeq <8 x double> %0, %1
23608 %3 = bitcast i8 %__u to <8 x i1>
23609 %4 = and <8 x i1> %2, %3
23610 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23611 %6 = bitcast <64 x i1> %5 to i64
23615 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
23616 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23617 ; VLX: # %bb.0: # %entry
23618 ; VLX-NEXT: kmovd %edi, %k1
23619 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23620 ; VLX-NEXT: kmovq %k0, %rax
23621 ; VLX-NEXT: vzeroupper
23624 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
23625 ; NoVLX: # %bb.0: # %entry
23626 ; NoVLX-NEXT: kmovw %edi, %k1
23627 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
23628 ; NoVLX-NEXT: kmovw %k0, %eax
23629 ; NoVLX-NEXT: movzwl %ax, %eax
23630 ; NoVLX-NEXT: vzeroupper
23633 %0 = bitcast <8 x i64> %__a to <8 x double>
23634 %load = load <8 x i64>, <8 x i64>* %__b
23635 %1 = bitcast <8 x i64> %load to <8 x double>
23636 %2 = fcmp oeq <8 x double> %0, %1
23637 %3 = bitcast i8 %__u to <8 x i1>
23638 %4 = and <8 x i1> %2, %3
23639 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23640 %6 = bitcast <64 x i1> %5 to i64
23644 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
23645 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23646 ; VLX: # %bb.0: # %entry
23647 ; VLX-NEXT: kmovd %edi, %k1
23648 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23649 ; VLX-NEXT: kmovq %k0, %rax
23650 ; VLX-NEXT: vzeroupper
23653 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
23654 ; NoVLX: # %bb.0: # %entry
23655 ; NoVLX-NEXT: kmovw %edi, %k1
23656 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
23657 ; NoVLX-NEXT: kmovw %k0, %eax
23658 ; NoVLX-NEXT: movzwl %ax, %eax
23659 ; NoVLX-NEXT: vzeroupper
23662 %0 = bitcast <8 x i64> %__a to <8 x double>
23663 %load = load double, double* %__b
23664 %vec = insertelement <8 x double> undef, double %load, i32 0
23665 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
23666 %2 = fcmp oeq <8 x double> %0, %1
23667 %3 = bitcast i8 %__u to <8 x i1>
23668 %4 = and <8 x i1> %2, %3
23669 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
23670 %6 = bitcast <64 x i1> %5 to i64
23676 define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23677 ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23678 ; VLX: # %bb.0: # %entry
23679 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23680 ; VLX-NEXT: kmovd %k0, %eax
23681 ; VLX-NEXT: movzbl %al, %eax
23682 ; VLX-NEXT: vzeroupper
23685 ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
23686 ; NoVLX: # %bb.0: # %entry
23687 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23688 ; NoVLX-NEXT: kmovw %k0, %eax
23689 ; NoVLX-NEXT: movzbl %al, %eax
23690 ; NoVLX-NEXT: vzeroupper
23693 %0 = bitcast <8 x i64> %__a to <8 x double>
23694 %1 = bitcast <8 x i64> %__b to <8 x double>
23695 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23696 %3 = bitcast <8 x i1> %2 to i8
23697 %4 = zext i8 %3 to i64
23701 define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
23702 ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23703 ; VLX: # %bb.0: # %entry
23704 ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23705 ; VLX-NEXT: kmovd %k0, %eax
23706 ; VLX-NEXT: andb %dil, %al
23707 ; VLX-NEXT: movzbl %al, %eax
23708 ; VLX-NEXT: vzeroupper
23711 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
23712 ; NoVLX: # %bb.0: # %entry
23713 ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
23714 ; NoVLX-NEXT: kmovw %k0, %eax
23715 ; NoVLX-NEXT: andb %dil, %al
23716 ; NoVLX-NEXT: movzbl %al, %eax
23717 ; NoVLX-NEXT: vzeroupper
23720 %0 = bitcast <8 x i64> %__a to <8 x double>
23721 %1 = bitcast <8 x i64> %__b to <8 x double>
23722 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8)
23723 %3 = bitcast i8 %__u to <8 x i1>
23724 %4 = and <8 x i1> %2, %3
23725 %5 = bitcast <8 x i1> %4 to i8
23726 %6 = zext i8 %5 to i64
23730 ; Test that we understand that cmpps with rounding zeros the upper bits of the mask register.
23731 define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) {
23732 ; VLX-LABEL: test_cmpm_rnd_zero:
23734 ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
23735 ; VLX-NEXT: kmovd %k0, %eax
23736 ; VLX-NEXT: vzeroupper
23739 ; NoVLX-LABEL: test_cmpm_rnd_zero:
23741 ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
23742 ; NoVLX-NEXT: kmovw %k0, %eax
23743 ; NoVLX-NEXT: vzeroupper
23745 %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8)
23746 %1 = bitcast <16 x i1> %res to i16
23747 %cast = bitcast i16 %1 to <16 x i1>
23748 %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
23749 %cast2 = bitcast <32 x i1> %shuffle to i32
23753 define i8 @mask_zero_lower(<4 x i32> %a) {
23754 ; VLX-LABEL: mask_zero_lower:
23756 ; VLX-NEXT: vptestmd %xmm0, %xmm0, %k0
23757 ; VLX-NEXT: kshiftlb $4, %k0, %k0
23758 ; VLX-NEXT: kmovd %k0, %eax
23759 ; VLX-NEXT: # kill: def $al killed $al killed $eax
23762 ; NoVLX-LABEL: mask_zero_lower:
23764 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23765 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
23766 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
23767 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0
23768 ; NoVLX-NEXT: kmovw %k0, %eax
23769 ; NoVLX-NEXT: # kill: def $al killed $al killed $eax
23770 ; NoVLX-NEXT: vzeroupper
23772 %cmp = icmp ne <4 x i32> %a, zeroinitializer
23773 %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
23774 %cast = bitcast <8 x i1> %concat to i8