1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=AVX,AVX512VPOPCNTDQ
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VPOPCNTDQVL
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=BITALG_NOVLX
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=BITALG
14 define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
15 ; SSE-LABEL: ugt_1_v16i8:
17 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
18 ; SSE-NEXT: movdqa %xmm0, %xmm2
19 ; SSE-NEXT: paddb %xmm1, %xmm2
20 ; SSE-NEXT: pand %xmm2, %xmm0
21 ; SSE-NEXT: pxor %xmm2, %xmm2
22 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
23 ; SSE-NEXT: pxor %xmm1, %xmm0
26 ; AVX1-LABEL: ugt_1_v16i8:
28 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
29 ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm2
30 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
31 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
32 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
33 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
36 ; AVX2-LABEL: ugt_1_v16i8:
38 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
39 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm2
40 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
41 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
42 ; AVX2-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
43 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
46 ; AVX512VPOPCNTDQ-LABEL: ugt_1_v16i8:
47 ; AVX512VPOPCNTDQ: # %bb.0:
48 ; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
49 ; AVX512VPOPCNTDQ-NEXT: vpaddb %xmm1, %xmm0, %xmm1
50 ; AVX512VPOPCNTDQ-NEXT: vpand %xmm1, %xmm0, %xmm0
51 ; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
52 ; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
53 ; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
54 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
55 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
56 ; AVX512VPOPCNTDQ-NEXT: retq
58 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v16i8:
59 ; AVX512VPOPCNTDQVL: # %bb.0:
60 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
61 ; AVX512VPOPCNTDQVL-NEXT: vpaddb %xmm1, %xmm0, %xmm1
62 ; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
63 ; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
64 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
65 ; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
66 ; AVX512VPOPCNTDQVL-NEXT: retq
68 ; BITALG_NOVLX-LABEL: ugt_1_v16i8:
69 ; BITALG_NOVLX: # %bb.0:
70 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
71 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
72 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
73 ; BITALG_NOVLX-NEXT: vzeroupper
74 ; BITALG_NOVLX-NEXT: retq
76 ; BITALG-LABEL: ugt_1_v16i8:
78 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
79 ; BITALG-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
80 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
82 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
83 %3 = icmp ugt <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
84 %4 = sext <16 x i1> %3 to <16 x i8>
88 define <16 x i8> @ult_2_v16i8(<16 x i8> %0) {
89 ; SSE-LABEL: ult_2_v16i8:
91 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
92 ; SSE-NEXT: paddb %xmm0, %xmm1
93 ; SSE-NEXT: pand %xmm1, %xmm0
94 ; SSE-NEXT: pxor %xmm1, %xmm1
95 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
98 ; AVX-LABEL: ult_2_v16i8:
100 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
101 ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm1
102 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
103 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
104 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
107 ; BITALG_NOVLX-LABEL: ult_2_v16i8:
108 ; BITALG_NOVLX: # %bb.0:
109 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
110 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
111 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
112 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
113 ; BITALG_NOVLX-NEXT: vzeroupper
114 ; BITALG_NOVLX-NEXT: retq
116 ; BITALG-LABEL: ult_2_v16i8:
118 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
119 ; BITALG-NEXT: vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
120 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
122 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
123 %3 = icmp ult <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
124 %4 = sext <16 x i1> %3 to <16 x i8>
128 define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) {
129 ; SSE2-LABEL: ugt_2_v16i8:
131 ; SSE2-NEXT: movdqa %xmm0, %xmm1
132 ; SSE2-NEXT: psrlw $1, %xmm1
133 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
134 ; SSE2-NEXT: psubb %xmm1, %xmm0
135 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
136 ; SSE2-NEXT: movdqa %xmm0, %xmm2
137 ; SSE2-NEXT: pand %xmm1, %xmm2
138 ; SSE2-NEXT: psrlw $2, %xmm0
139 ; SSE2-NEXT: pand %xmm1, %xmm0
140 ; SSE2-NEXT: paddb %xmm2, %xmm0
141 ; SSE2-NEXT: movdqa %xmm0, %xmm1
142 ; SSE2-NEXT: psrlw $4, %xmm1
143 ; SSE2-NEXT: paddb %xmm1, %xmm0
144 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
145 ; SSE2-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
148 ; SSE3-LABEL: ugt_2_v16i8:
150 ; SSE3-NEXT: movdqa %xmm0, %xmm1
151 ; SSE3-NEXT: psrlw $1, %xmm1
152 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
153 ; SSE3-NEXT: psubb %xmm1, %xmm0
154 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
155 ; SSE3-NEXT: movdqa %xmm0, %xmm2
156 ; SSE3-NEXT: pand %xmm1, %xmm2
157 ; SSE3-NEXT: psrlw $2, %xmm0
158 ; SSE3-NEXT: pand %xmm1, %xmm0
159 ; SSE3-NEXT: paddb %xmm2, %xmm0
160 ; SSE3-NEXT: movdqa %xmm0, %xmm1
161 ; SSE3-NEXT: psrlw $4, %xmm1
162 ; SSE3-NEXT: paddb %xmm1, %xmm0
163 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
164 ; SSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
167 ; SSSE3-LABEL: ugt_2_v16i8:
169 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
170 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
171 ; SSSE3-NEXT: pand %xmm1, %xmm2
172 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
173 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
174 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
175 ; SSSE3-NEXT: psrlw $4, %xmm0
176 ; SSSE3-NEXT: pand %xmm1, %xmm0
177 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
178 ; SSSE3-NEXT: paddb %xmm4, %xmm3
179 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
180 ; SSSE3-NEXT: pmaxub %xmm3, %xmm0
181 ; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0
184 ; SSE41-LABEL: ugt_2_v16i8:
186 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
187 ; SSE41-NEXT: movdqa %xmm0, %xmm2
188 ; SSE41-NEXT: pand %xmm1, %xmm2
189 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
190 ; SSE41-NEXT: movdqa %xmm3, %xmm4
191 ; SSE41-NEXT: pshufb %xmm2, %xmm4
192 ; SSE41-NEXT: psrlw $4, %xmm0
193 ; SSE41-NEXT: pand %xmm1, %xmm0
194 ; SSE41-NEXT: pshufb %xmm0, %xmm3
195 ; SSE41-NEXT: paddb %xmm4, %xmm3
196 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
197 ; SSE41-NEXT: pmaxub %xmm3, %xmm0
198 ; SSE41-NEXT: pcmpeqb %xmm3, %xmm0
201 ; AVX1-LABEL: ugt_2_v16i8:
203 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
204 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
205 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
206 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
207 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
208 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
209 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
210 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
211 ; AVX1-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
212 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
215 ; AVX2-LABEL: ugt_2_v16i8:
217 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
218 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
219 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
220 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
221 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
222 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
223 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
224 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
225 ; AVX2-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
226 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
229 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v16i8:
230 ; AVX512VPOPCNTDQ: # %bb.0:
231 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
232 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
233 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
234 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
235 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
236 ; AVX512VPOPCNTDQ-NEXT: retq
238 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v16i8:
239 ; AVX512VPOPCNTDQVL: # %bb.0:
240 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
241 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
242 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
243 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
244 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
245 ; AVX512VPOPCNTDQVL-NEXT: retq
247 ; BITALG_NOVLX-LABEL: ugt_2_v16i8:
248 ; BITALG_NOVLX: # %bb.0:
249 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
250 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
251 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
252 ; BITALG_NOVLX-NEXT: vzeroupper
253 ; BITALG_NOVLX-NEXT: retq
255 ; BITALG-LABEL: ugt_2_v16i8:
257 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
258 ; BITALG-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
259 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
261 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
262 %3 = icmp ugt <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
263 %4 = sext <16 x i1> %3 to <16 x i8>
267 define <16 x i8> @ult_3_v16i8(<16 x i8> %0) {
268 ; SSE2-LABEL: ult_3_v16i8:
270 ; SSE2-NEXT: movdqa %xmm0, %xmm1
271 ; SSE2-NEXT: psrlw $1, %xmm1
272 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
273 ; SSE2-NEXT: psubb %xmm1, %xmm0
274 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
275 ; SSE2-NEXT: movdqa %xmm0, %xmm2
276 ; SSE2-NEXT: pand %xmm1, %xmm2
277 ; SSE2-NEXT: psrlw $2, %xmm0
278 ; SSE2-NEXT: pand %xmm1, %xmm0
279 ; SSE2-NEXT: paddb %xmm2, %xmm0
280 ; SSE2-NEXT: movdqa %xmm0, %xmm1
281 ; SSE2-NEXT: psrlw $4, %xmm1
282 ; SSE2-NEXT: paddb %xmm0, %xmm1
283 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
284 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
285 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
288 ; SSE3-LABEL: ult_3_v16i8:
290 ; SSE3-NEXT: movdqa %xmm0, %xmm1
291 ; SSE3-NEXT: psrlw $1, %xmm1
292 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
293 ; SSE3-NEXT: psubb %xmm1, %xmm0
294 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
295 ; SSE3-NEXT: movdqa %xmm0, %xmm2
296 ; SSE3-NEXT: pand %xmm1, %xmm2
297 ; SSE3-NEXT: psrlw $2, %xmm0
298 ; SSE3-NEXT: pand %xmm1, %xmm0
299 ; SSE3-NEXT: paddb %xmm2, %xmm0
300 ; SSE3-NEXT: movdqa %xmm0, %xmm1
301 ; SSE3-NEXT: psrlw $4, %xmm1
302 ; SSE3-NEXT: paddb %xmm0, %xmm1
303 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
304 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
305 ; SSE3-NEXT: pcmpgtb %xmm1, %xmm0
308 ; SSSE3-LABEL: ult_3_v16i8:
310 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
311 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
312 ; SSSE3-NEXT: pand %xmm1, %xmm2
313 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
314 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
315 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
316 ; SSSE3-NEXT: psrlw $4, %xmm0
317 ; SSSE3-NEXT: pand %xmm1, %xmm0
318 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
319 ; SSSE3-NEXT: paddb %xmm4, %xmm3
320 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
321 ; SSSE3-NEXT: pminub %xmm3, %xmm0
322 ; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0
325 ; SSE41-LABEL: ult_3_v16i8:
327 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
328 ; SSE41-NEXT: movdqa %xmm0, %xmm2
329 ; SSE41-NEXT: pand %xmm1, %xmm2
330 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
331 ; SSE41-NEXT: movdqa %xmm3, %xmm4
332 ; SSE41-NEXT: pshufb %xmm2, %xmm4
333 ; SSE41-NEXT: psrlw $4, %xmm0
334 ; SSE41-NEXT: pand %xmm1, %xmm0
335 ; SSE41-NEXT: pshufb %xmm0, %xmm3
336 ; SSE41-NEXT: paddb %xmm4, %xmm3
337 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
338 ; SSE41-NEXT: pminub %xmm3, %xmm0
339 ; SSE41-NEXT: pcmpeqb %xmm3, %xmm0
342 ; AVX1-LABEL: ult_3_v16i8:
344 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
345 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
346 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
347 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
348 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
349 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
350 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
351 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
352 ; AVX1-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
353 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
356 ; AVX2-LABEL: ult_3_v16i8:
358 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
359 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
360 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
361 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
362 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
363 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
364 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
365 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
366 ; AVX2-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
367 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
370 ; AVX512VPOPCNTDQ-LABEL: ult_3_v16i8:
371 ; AVX512VPOPCNTDQ: # %bb.0:
372 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
373 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
374 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
375 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
376 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
377 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
378 ; AVX512VPOPCNTDQ-NEXT: retq
380 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v16i8:
381 ; AVX512VPOPCNTDQVL: # %bb.0:
382 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
383 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
384 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
385 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
386 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
387 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
388 ; AVX512VPOPCNTDQVL-NEXT: retq
390 ; BITALG_NOVLX-LABEL: ult_3_v16i8:
391 ; BITALG_NOVLX: # %bb.0:
392 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
393 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
394 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
395 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
396 ; BITALG_NOVLX-NEXT: vzeroupper
397 ; BITALG_NOVLX-NEXT: retq
399 ; BITALG-LABEL: ult_3_v16i8:
401 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
402 ; BITALG-NEXT: vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
403 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
405 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
406 %3 = icmp ult <16 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
407 %4 = sext <16 x i1> %3 to <16 x i8>
411 define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) {
412 ; SSE2-LABEL: ugt_3_v16i8:
414 ; SSE2-NEXT: movdqa %xmm0, %xmm1
415 ; SSE2-NEXT: psrlw $1, %xmm1
416 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
417 ; SSE2-NEXT: psubb %xmm1, %xmm0
418 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
419 ; SSE2-NEXT: movdqa %xmm0, %xmm2
420 ; SSE2-NEXT: pand %xmm1, %xmm2
421 ; SSE2-NEXT: psrlw $2, %xmm0
422 ; SSE2-NEXT: pand %xmm1, %xmm0
423 ; SSE2-NEXT: paddb %xmm2, %xmm0
424 ; SSE2-NEXT: movdqa %xmm0, %xmm1
425 ; SSE2-NEXT: psrlw $4, %xmm1
426 ; SSE2-NEXT: paddb %xmm1, %xmm0
427 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
428 ; SSE2-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
431 ; SSE3-LABEL: ugt_3_v16i8:
433 ; SSE3-NEXT: movdqa %xmm0, %xmm1
434 ; SSE3-NEXT: psrlw $1, %xmm1
435 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
436 ; SSE3-NEXT: psubb %xmm1, %xmm0
437 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
438 ; SSE3-NEXT: movdqa %xmm0, %xmm2
439 ; SSE3-NEXT: pand %xmm1, %xmm2
440 ; SSE3-NEXT: psrlw $2, %xmm0
441 ; SSE3-NEXT: pand %xmm1, %xmm0
442 ; SSE3-NEXT: paddb %xmm2, %xmm0
443 ; SSE3-NEXT: movdqa %xmm0, %xmm1
444 ; SSE3-NEXT: psrlw $4, %xmm1
445 ; SSE3-NEXT: paddb %xmm1, %xmm0
446 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
447 ; SSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
450 ; SSSE3-LABEL: ugt_3_v16i8:
452 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
453 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
454 ; SSSE3-NEXT: pand %xmm1, %xmm2
455 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
456 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
457 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
458 ; SSSE3-NEXT: psrlw $4, %xmm0
459 ; SSSE3-NEXT: pand %xmm1, %xmm0
460 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
461 ; SSSE3-NEXT: paddb %xmm4, %xmm3
462 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
463 ; SSSE3-NEXT: pmaxub %xmm3, %xmm0
464 ; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0
467 ; SSE41-LABEL: ugt_3_v16i8:
469 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
470 ; SSE41-NEXT: movdqa %xmm0, %xmm2
471 ; SSE41-NEXT: pand %xmm1, %xmm2
472 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
473 ; SSE41-NEXT: movdqa %xmm3, %xmm4
474 ; SSE41-NEXT: pshufb %xmm2, %xmm4
475 ; SSE41-NEXT: psrlw $4, %xmm0
476 ; SSE41-NEXT: pand %xmm1, %xmm0
477 ; SSE41-NEXT: pshufb %xmm0, %xmm3
478 ; SSE41-NEXT: paddb %xmm4, %xmm3
479 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
480 ; SSE41-NEXT: pmaxub %xmm3, %xmm0
481 ; SSE41-NEXT: pcmpeqb %xmm3, %xmm0
484 ; AVX1-LABEL: ugt_3_v16i8:
486 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
487 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
488 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
489 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
490 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
491 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
492 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
493 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
494 ; AVX1-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
495 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
498 ; AVX2-LABEL: ugt_3_v16i8:
500 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
501 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
502 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
503 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
504 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
505 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
506 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
507 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
508 ; AVX2-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
509 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
512 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v16i8:
513 ; AVX512VPOPCNTDQ: # %bb.0:
514 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
515 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
516 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
517 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
518 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
519 ; AVX512VPOPCNTDQ-NEXT: retq
521 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v16i8:
522 ; AVX512VPOPCNTDQVL: # %bb.0:
523 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
524 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
525 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
526 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
527 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
528 ; AVX512VPOPCNTDQVL-NEXT: retq
530 ; BITALG_NOVLX-LABEL: ugt_3_v16i8:
531 ; BITALG_NOVLX: # %bb.0:
532 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
533 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
534 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
535 ; BITALG_NOVLX-NEXT: vzeroupper
536 ; BITALG_NOVLX-NEXT: retq
538 ; BITALG-LABEL: ugt_3_v16i8:
540 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
541 ; BITALG-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
542 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
544 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
545 %3 = icmp ugt <16 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
546 %4 = sext <16 x i1> %3 to <16 x i8>
550 define <16 x i8> @ult_4_v16i8(<16 x i8> %0) {
551 ; SSE2-LABEL: ult_4_v16i8:
553 ; SSE2-NEXT: movdqa %xmm0, %xmm1
554 ; SSE2-NEXT: psrlw $1, %xmm1
555 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
556 ; SSE2-NEXT: psubb %xmm1, %xmm0
557 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
558 ; SSE2-NEXT: movdqa %xmm0, %xmm2
559 ; SSE2-NEXT: pand %xmm1, %xmm2
560 ; SSE2-NEXT: psrlw $2, %xmm0
561 ; SSE2-NEXT: pand %xmm1, %xmm0
562 ; SSE2-NEXT: paddb %xmm2, %xmm0
563 ; SSE2-NEXT: movdqa %xmm0, %xmm1
564 ; SSE2-NEXT: psrlw $4, %xmm1
565 ; SSE2-NEXT: paddb %xmm0, %xmm1
566 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
567 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
568 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
571 ; SSE3-LABEL: ult_4_v16i8:
573 ; SSE3-NEXT: movdqa %xmm0, %xmm1
574 ; SSE3-NEXT: psrlw $1, %xmm1
575 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
576 ; SSE3-NEXT: psubb %xmm1, %xmm0
577 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
578 ; SSE3-NEXT: movdqa %xmm0, %xmm2
579 ; SSE3-NEXT: pand %xmm1, %xmm2
580 ; SSE3-NEXT: psrlw $2, %xmm0
581 ; SSE3-NEXT: pand %xmm1, %xmm0
582 ; SSE3-NEXT: paddb %xmm2, %xmm0
583 ; SSE3-NEXT: movdqa %xmm0, %xmm1
584 ; SSE3-NEXT: psrlw $4, %xmm1
585 ; SSE3-NEXT: paddb %xmm0, %xmm1
586 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
587 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
588 ; SSE3-NEXT: pcmpgtb %xmm1, %xmm0
591 ; SSSE3-LABEL: ult_4_v16i8:
593 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
594 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
595 ; SSSE3-NEXT: pand %xmm1, %xmm2
596 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
597 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
598 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
599 ; SSSE3-NEXT: psrlw $4, %xmm0
600 ; SSSE3-NEXT: pand %xmm1, %xmm0
601 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
602 ; SSSE3-NEXT: paddb %xmm4, %xmm3
603 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
604 ; SSSE3-NEXT: pminub %xmm3, %xmm0
605 ; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0
608 ; SSE41-LABEL: ult_4_v16i8:
610 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
611 ; SSE41-NEXT: movdqa %xmm0, %xmm2
612 ; SSE41-NEXT: pand %xmm1, %xmm2
613 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
614 ; SSE41-NEXT: movdqa %xmm3, %xmm4
615 ; SSE41-NEXT: pshufb %xmm2, %xmm4
616 ; SSE41-NEXT: psrlw $4, %xmm0
617 ; SSE41-NEXT: pand %xmm1, %xmm0
618 ; SSE41-NEXT: pshufb %xmm0, %xmm3
619 ; SSE41-NEXT: paddb %xmm4, %xmm3
620 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
621 ; SSE41-NEXT: pminub %xmm3, %xmm0
622 ; SSE41-NEXT: pcmpeqb %xmm3, %xmm0
625 ; AVX1-LABEL: ult_4_v16i8:
627 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
628 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
629 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
630 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
631 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
632 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
633 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
634 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
635 ; AVX1-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
636 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
639 ; AVX2-LABEL: ult_4_v16i8:
641 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
642 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
643 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
644 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
645 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
646 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
647 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
648 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
649 ; AVX2-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
650 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
653 ; AVX512VPOPCNTDQ-LABEL: ult_4_v16i8:
654 ; AVX512VPOPCNTDQ: # %bb.0:
655 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
656 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
657 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
658 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
659 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
660 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
661 ; AVX512VPOPCNTDQ-NEXT: retq
663 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v16i8:
664 ; AVX512VPOPCNTDQVL: # %bb.0:
665 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
666 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
667 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
668 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
669 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
670 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
671 ; AVX512VPOPCNTDQVL-NEXT: retq
673 ; BITALG_NOVLX-LABEL: ult_4_v16i8:
674 ; BITALG_NOVLX: # %bb.0:
675 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
676 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
677 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
678 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
679 ; BITALG_NOVLX-NEXT: vzeroupper
680 ; BITALG_NOVLX-NEXT: retq
682 ; BITALG-LABEL: ult_4_v16i8:
684 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
685 ; BITALG-NEXT: vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
686 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
688 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
689 %3 = icmp ult <16 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
690 %4 = sext <16 x i1> %3 to <16 x i8>
694 define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) {
695 ; SSE2-LABEL: ugt_4_v16i8:
697 ; SSE2-NEXT: movdqa %xmm0, %xmm1
698 ; SSE2-NEXT: psrlw $1, %xmm1
699 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
700 ; SSE2-NEXT: psubb %xmm1, %xmm0
701 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
702 ; SSE2-NEXT: movdqa %xmm0, %xmm2
703 ; SSE2-NEXT: pand %xmm1, %xmm2
704 ; SSE2-NEXT: psrlw $2, %xmm0
705 ; SSE2-NEXT: pand %xmm1, %xmm0
706 ; SSE2-NEXT: paddb %xmm2, %xmm0
707 ; SSE2-NEXT: movdqa %xmm0, %xmm1
708 ; SSE2-NEXT: psrlw $4, %xmm1
709 ; SSE2-NEXT: paddb %xmm1, %xmm0
710 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
711 ; SSE2-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
714 ; SSE3-LABEL: ugt_4_v16i8:
716 ; SSE3-NEXT: movdqa %xmm0, %xmm1
717 ; SSE3-NEXT: psrlw $1, %xmm1
718 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
719 ; SSE3-NEXT: psubb %xmm1, %xmm0
720 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
721 ; SSE3-NEXT: movdqa %xmm0, %xmm2
722 ; SSE3-NEXT: pand %xmm1, %xmm2
723 ; SSE3-NEXT: psrlw $2, %xmm0
724 ; SSE3-NEXT: pand %xmm1, %xmm0
725 ; SSE3-NEXT: paddb %xmm2, %xmm0
726 ; SSE3-NEXT: movdqa %xmm0, %xmm1
727 ; SSE3-NEXT: psrlw $4, %xmm1
728 ; SSE3-NEXT: paddb %xmm1, %xmm0
729 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
730 ; SSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
733 ; SSSE3-LABEL: ugt_4_v16i8:
735 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
736 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
737 ; SSSE3-NEXT: pand %xmm1, %xmm2
738 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
739 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
740 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
741 ; SSSE3-NEXT: psrlw $4, %xmm0
742 ; SSSE3-NEXT: pand %xmm1, %xmm0
743 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
744 ; SSSE3-NEXT: paddb %xmm4, %xmm3
745 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
746 ; SSSE3-NEXT: pmaxub %xmm3, %xmm0
747 ; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0
750 ; SSE41-LABEL: ugt_4_v16i8:
752 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
753 ; SSE41-NEXT: movdqa %xmm0, %xmm2
754 ; SSE41-NEXT: pand %xmm1, %xmm2
755 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
756 ; SSE41-NEXT: movdqa %xmm3, %xmm4
757 ; SSE41-NEXT: pshufb %xmm2, %xmm4
758 ; SSE41-NEXT: psrlw $4, %xmm0
759 ; SSE41-NEXT: pand %xmm1, %xmm0
760 ; SSE41-NEXT: pshufb %xmm0, %xmm3
761 ; SSE41-NEXT: paddb %xmm4, %xmm3
762 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
763 ; SSE41-NEXT: pmaxub %xmm3, %xmm0
764 ; SSE41-NEXT: pcmpeqb %xmm3, %xmm0
767 ; AVX1-LABEL: ugt_4_v16i8:
769 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
770 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
771 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
772 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
773 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
774 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
775 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
776 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
777 ; AVX1-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
778 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
781 ; AVX2-LABEL: ugt_4_v16i8:
783 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
784 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
785 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
786 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
787 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
788 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
789 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
790 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
791 ; AVX2-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
792 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
795 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v16i8:
796 ; AVX512VPOPCNTDQ: # %bb.0:
797 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
798 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
799 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
800 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
801 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
802 ; AVX512VPOPCNTDQ-NEXT: retq
804 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v16i8:
805 ; AVX512VPOPCNTDQVL: # %bb.0:
806 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
807 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
808 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
809 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
810 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
811 ; AVX512VPOPCNTDQVL-NEXT: retq
813 ; BITALG_NOVLX-LABEL: ugt_4_v16i8:
814 ; BITALG_NOVLX: # %bb.0:
815 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
816 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
817 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
818 ; BITALG_NOVLX-NEXT: vzeroupper
819 ; BITALG_NOVLX-NEXT: retq
821 ; BITALG-LABEL: ugt_4_v16i8:
823 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
824 ; BITALG-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
825 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
827 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
828 %3 = icmp ugt <16 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
829 %4 = sext <16 x i1> %3 to <16 x i8>
833 define <16 x i8> @ult_5_v16i8(<16 x i8> %0) {
834 ; SSE2-LABEL: ult_5_v16i8:
836 ; SSE2-NEXT: movdqa %xmm0, %xmm1
837 ; SSE2-NEXT: psrlw $1, %xmm1
838 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
839 ; SSE2-NEXT: psubb %xmm1, %xmm0
840 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
841 ; SSE2-NEXT: movdqa %xmm0, %xmm2
842 ; SSE2-NEXT: pand %xmm1, %xmm2
843 ; SSE2-NEXT: psrlw $2, %xmm0
844 ; SSE2-NEXT: pand %xmm1, %xmm0
845 ; SSE2-NEXT: paddb %xmm2, %xmm0
846 ; SSE2-NEXT: movdqa %xmm0, %xmm1
847 ; SSE2-NEXT: psrlw $4, %xmm1
848 ; SSE2-NEXT: paddb %xmm0, %xmm1
849 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
850 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
851 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
854 ; SSE3-LABEL: ult_5_v16i8:
856 ; SSE3-NEXT: movdqa %xmm0, %xmm1
857 ; SSE3-NEXT: psrlw $1, %xmm1
858 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
859 ; SSE3-NEXT: psubb %xmm1, %xmm0
860 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
861 ; SSE3-NEXT: movdqa %xmm0, %xmm2
862 ; SSE3-NEXT: pand %xmm1, %xmm2
863 ; SSE3-NEXT: psrlw $2, %xmm0
864 ; SSE3-NEXT: pand %xmm1, %xmm0
865 ; SSE3-NEXT: paddb %xmm2, %xmm0
866 ; SSE3-NEXT: movdqa %xmm0, %xmm1
867 ; SSE3-NEXT: psrlw $4, %xmm1
868 ; SSE3-NEXT: paddb %xmm0, %xmm1
869 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
870 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
871 ; SSE3-NEXT: pcmpgtb %xmm1, %xmm0
874 ; SSSE3-LABEL: ult_5_v16i8:
876 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
877 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
878 ; SSSE3-NEXT: pand %xmm1, %xmm2
879 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
880 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
881 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
882 ; SSSE3-NEXT: psrlw $4, %xmm0
883 ; SSSE3-NEXT: pand %xmm1, %xmm0
884 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
885 ; SSSE3-NEXT: paddb %xmm4, %xmm3
886 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
887 ; SSSE3-NEXT: pminub %xmm3, %xmm0
888 ; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0
891 ; SSE41-LABEL: ult_5_v16i8:
893 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
894 ; SSE41-NEXT: movdqa %xmm0, %xmm2
895 ; SSE41-NEXT: pand %xmm1, %xmm2
896 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
897 ; SSE41-NEXT: movdqa %xmm3, %xmm4
898 ; SSE41-NEXT: pshufb %xmm2, %xmm4
899 ; SSE41-NEXT: psrlw $4, %xmm0
900 ; SSE41-NEXT: pand %xmm1, %xmm0
901 ; SSE41-NEXT: pshufb %xmm0, %xmm3
902 ; SSE41-NEXT: paddb %xmm4, %xmm3
903 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
904 ; SSE41-NEXT: pminub %xmm3, %xmm0
905 ; SSE41-NEXT: pcmpeqb %xmm3, %xmm0
908 ; AVX1-LABEL: ult_5_v16i8:
910 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
911 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
912 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
913 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
914 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
915 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
916 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
917 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
918 ; AVX1-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
919 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
922 ; AVX2-LABEL: ult_5_v16i8:
924 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
925 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
926 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
927 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
928 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
929 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
930 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
931 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
932 ; AVX2-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
933 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
936 ; AVX512VPOPCNTDQ-LABEL: ult_5_v16i8:
937 ; AVX512VPOPCNTDQ: # %bb.0:
938 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
939 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
940 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
941 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
942 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
943 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
944 ; AVX512VPOPCNTDQ-NEXT: retq
946 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v16i8:
947 ; AVX512VPOPCNTDQVL: # %bb.0:
948 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
949 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
950 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
951 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
952 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
953 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
954 ; AVX512VPOPCNTDQVL-NEXT: retq
956 ; BITALG_NOVLX-LABEL: ult_5_v16i8:
957 ; BITALG_NOVLX: # %bb.0:
958 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
959 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
960 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
961 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
962 ; BITALG_NOVLX-NEXT: vzeroupper
963 ; BITALG_NOVLX-NEXT: retq
965 ; BITALG-LABEL: ult_5_v16i8:
967 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
968 ; BITALG-NEXT: vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
969 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
971 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
972 %3 = icmp ult <16 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
973 %4 = sext <16 x i1> %3 to <16 x i8>
977 define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) {
978 ; SSE2-LABEL: ugt_5_v16i8:
980 ; SSE2-NEXT: movdqa %xmm0, %xmm1
981 ; SSE2-NEXT: psrlw $1, %xmm1
982 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
983 ; SSE2-NEXT: psubb %xmm1, %xmm0
984 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
985 ; SSE2-NEXT: movdqa %xmm0, %xmm2
986 ; SSE2-NEXT: pand %xmm1, %xmm2
987 ; SSE2-NEXT: psrlw $2, %xmm0
988 ; SSE2-NEXT: pand %xmm1, %xmm0
989 ; SSE2-NEXT: paddb %xmm2, %xmm0
990 ; SSE2-NEXT: movdqa %xmm0, %xmm1
991 ; SSE2-NEXT: psrlw $4, %xmm1
992 ; SSE2-NEXT: paddb %xmm1, %xmm0
993 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
994 ; SSE2-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
997 ; SSE3-LABEL: ugt_5_v16i8:
999 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1000 ; SSE3-NEXT: psrlw $1, %xmm1
1001 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1002 ; SSE3-NEXT: psubb %xmm1, %xmm0
1003 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1004 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1005 ; SSE3-NEXT: pand %xmm1, %xmm2
1006 ; SSE3-NEXT: psrlw $2, %xmm0
1007 ; SSE3-NEXT: pand %xmm1, %xmm0
1008 ; SSE3-NEXT: paddb %xmm2, %xmm0
1009 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1010 ; SSE3-NEXT: psrlw $4, %xmm1
1011 ; SSE3-NEXT: paddb %xmm1, %xmm0
1012 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1013 ; SSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1016 ; SSSE3-LABEL: ugt_5_v16i8:
1018 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1019 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1020 ; SSSE3-NEXT: pand %xmm1, %xmm2
1021 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1022 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1023 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1024 ; SSSE3-NEXT: psrlw $4, %xmm0
1025 ; SSSE3-NEXT: pand %xmm1, %xmm0
1026 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1027 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1028 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1029 ; SSSE3-NEXT: pmaxub %xmm3, %xmm0
1030 ; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0
1033 ; SSE41-LABEL: ugt_5_v16i8:
1035 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1036 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1037 ; SSE41-NEXT: pand %xmm1, %xmm2
1038 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1039 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1040 ; SSE41-NEXT: pshufb %xmm2, %xmm4
1041 ; SSE41-NEXT: psrlw $4, %xmm0
1042 ; SSE41-NEXT: pand %xmm1, %xmm0
1043 ; SSE41-NEXT: pshufb %xmm0, %xmm3
1044 ; SSE41-NEXT: paddb %xmm4, %xmm3
1045 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1046 ; SSE41-NEXT: pmaxub %xmm3, %xmm0
1047 ; SSE41-NEXT: pcmpeqb %xmm3, %xmm0
1050 ; AVX1-LABEL: ugt_5_v16i8:
1052 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1053 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1054 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1055 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1056 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1057 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1058 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1059 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1060 ; AVX1-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1061 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1064 ; AVX2-LABEL: ugt_5_v16i8:
1066 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1067 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1068 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1069 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1070 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1071 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1072 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1073 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1074 ; AVX2-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1075 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1078 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v16i8:
1079 ; AVX512VPOPCNTDQ: # %bb.0:
1080 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1081 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1082 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
1083 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1084 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1085 ; AVX512VPOPCNTDQ-NEXT: retq
1087 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v16i8:
1088 ; AVX512VPOPCNTDQVL: # %bb.0:
1089 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1090 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
1091 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
1092 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1093 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1094 ; AVX512VPOPCNTDQVL-NEXT: retq
1096 ; BITALG_NOVLX-LABEL: ugt_5_v16i8:
1097 ; BITALG_NOVLX: # %bb.0:
1098 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1099 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
1100 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1101 ; BITALG_NOVLX-NEXT: vzeroupper
1102 ; BITALG_NOVLX-NEXT: retq
1104 ; BITALG-LABEL: ugt_5_v16i8:
1106 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
1107 ; BITALG-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1108 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
1110 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1111 %3 = icmp ugt <16 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
1112 %4 = sext <16 x i1> %3 to <16 x i8>
1116 define <16 x i8> @ult_6_v16i8(<16 x i8> %0) {
1117 ; SSE2-LABEL: ult_6_v16i8:
1119 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1120 ; SSE2-NEXT: psrlw $1, %xmm1
1121 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1122 ; SSE2-NEXT: psubb %xmm1, %xmm0
1123 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1124 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1125 ; SSE2-NEXT: pand %xmm1, %xmm2
1126 ; SSE2-NEXT: psrlw $2, %xmm0
1127 ; SSE2-NEXT: pand %xmm1, %xmm0
1128 ; SSE2-NEXT: paddb %xmm2, %xmm0
1129 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1130 ; SSE2-NEXT: psrlw $4, %xmm1
1131 ; SSE2-NEXT: paddb %xmm0, %xmm1
1132 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1133 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1134 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
1137 ; SSE3-LABEL: ult_6_v16i8:
1139 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1140 ; SSE3-NEXT: psrlw $1, %xmm1
1141 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1142 ; SSE3-NEXT: psubb %xmm1, %xmm0
1143 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1144 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1145 ; SSE3-NEXT: pand %xmm1, %xmm2
1146 ; SSE3-NEXT: psrlw $2, %xmm0
1147 ; SSE3-NEXT: pand %xmm1, %xmm0
1148 ; SSE3-NEXT: paddb %xmm2, %xmm0
1149 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1150 ; SSE3-NEXT: psrlw $4, %xmm1
1151 ; SSE3-NEXT: paddb %xmm0, %xmm1
1152 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1153 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1154 ; SSE3-NEXT: pcmpgtb %xmm1, %xmm0
1157 ; SSSE3-LABEL: ult_6_v16i8:
1159 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1160 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1161 ; SSSE3-NEXT: pand %xmm1, %xmm2
1162 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1163 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1164 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1165 ; SSSE3-NEXT: psrlw $4, %xmm0
1166 ; SSSE3-NEXT: pand %xmm1, %xmm0
1167 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1168 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1169 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
1170 ; SSSE3-NEXT: pminub %xmm3, %xmm0
1171 ; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0
1174 ; SSE41-LABEL: ult_6_v16i8:
1176 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1177 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1178 ; SSE41-NEXT: pand %xmm1, %xmm2
1179 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1180 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1181 ; SSE41-NEXT: pshufb %xmm2, %xmm4
1182 ; SSE41-NEXT: psrlw $4, %xmm0
1183 ; SSE41-NEXT: pand %xmm1, %xmm0
1184 ; SSE41-NEXT: pshufb %xmm0, %xmm3
1185 ; SSE41-NEXT: paddb %xmm4, %xmm3
1186 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
1187 ; SSE41-NEXT: pminub %xmm3, %xmm0
1188 ; SSE41-NEXT: pcmpeqb %xmm3, %xmm0
1191 ; AVX1-LABEL: ult_6_v16i8:
1193 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1194 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1195 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1196 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1197 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1198 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1199 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1200 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1201 ; AVX1-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1202 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1205 ; AVX2-LABEL: ult_6_v16i8:
1207 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1208 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1209 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1210 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1211 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1212 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1213 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1214 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1215 ; AVX2-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1216 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1219 ; AVX512VPOPCNTDQ-LABEL: ult_6_v16i8:
1220 ; AVX512VPOPCNTDQ: # %bb.0:
1221 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1222 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1223 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
1224 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1225 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1226 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1227 ; AVX512VPOPCNTDQ-NEXT: retq
1229 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v16i8:
1230 ; AVX512VPOPCNTDQVL: # %bb.0:
1231 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1232 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
1233 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
1234 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1235 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1236 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1237 ; AVX512VPOPCNTDQVL-NEXT: retq
1239 ; BITALG_NOVLX-LABEL: ult_6_v16i8:
1240 ; BITALG_NOVLX: # %bb.0:
1241 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1242 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
1243 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1244 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1245 ; BITALG_NOVLX-NEXT: vzeroupper
1246 ; BITALG_NOVLX-NEXT: retq
1248 ; BITALG-LABEL: ult_6_v16i8:
1250 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
1251 ; BITALG-NEXT: vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1252 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
1254 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1255 %3 = icmp ult <16 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
1256 %4 = sext <16 x i1> %3 to <16 x i8>
1260 define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) {
1261 ; SSE2-LABEL: ugt_6_v16i8:
1263 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1264 ; SSE2-NEXT: psrlw $1, %xmm1
1265 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1266 ; SSE2-NEXT: psubb %xmm1, %xmm0
1267 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1268 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1269 ; SSE2-NEXT: pand %xmm1, %xmm2
1270 ; SSE2-NEXT: psrlw $2, %xmm0
1271 ; SSE2-NEXT: pand %xmm1, %xmm0
1272 ; SSE2-NEXT: paddb %xmm2, %xmm0
1273 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1274 ; SSE2-NEXT: psrlw $4, %xmm1
1275 ; SSE2-NEXT: paddb %xmm1, %xmm0
1276 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1277 ; SSE2-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1280 ; SSE3-LABEL: ugt_6_v16i8:
1282 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1283 ; SSE3-NEXT: psrlw $1, %xmm1
1284 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1285 ; SSE3-NEXT: psubb %xmm1, %xmm0
1286 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1287 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1288 ; SSE3-NEXT: pand %xmm1, %xmm2
1289 ; SSE3-NEXT: psrlw $2, %xmm0
1290 ; SSE3-NEXT: pand %xmm1, %xmm0
1291 ; SSE3-NEXT: paddb %xmm2, %xmm0
1292 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1293 ; SSE3-NEXT: psrlw $4, %xmm1
1294 ; SSE3-NEXT: paddb %xmm1, %xmm0
1295 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1296 ; SSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1299 ; SSSE3-LABEL: ugt_6_v16i8:
1301 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1302 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1303 ; SSSE3-NEXT: pand %xmm1, %xmm2
1304 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1305 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1306 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1307 ; SSSE3-NEXT: psrlw $4, %xmm0
1308 ; SSSE3-NEXT: pand %xmm1, %xmm0
1309 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1310 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1311 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1312 ; SSSE3-NEXT: pmaxub %xmm3, %xmm0
1313 ; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0
1316 ; SSE41-LABEL: ugt_6_v16i8:
1318 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1319 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1320 ; SSE41-NEXT: pand %xmm1, %xmm2
1321 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1322 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1323 ; SSE41-NEXT: pshufb %xmm2, %xmm4
1324 ; SSE41-NEXT: psrlw $4, %xmm0
1325 ; SSE41-NEXT: pand %xmm1, %xmm0
1326 ; SSE41-NEXT: pshufb %xmm0, %xmm3
1327 ; SSE41-NEXT: paddb %xmm4, %xmm3
1328 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1329 ; SSE41-NEXT: pmaxub %xmm3, %xmm0
1330 ; SSE41-NEXT: pcmpeqb %xmm3, %xmm0
1333 ; AVX1-LABEL: ugt_6_v16i8:
1335 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1336 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1337 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1338 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1339 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1340 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1341 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1342 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1343 ; AVX1-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1344 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1347 ; AVX2-LABEL: ugt_6_v16i8:
1349 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1350 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1351 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1352 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1353 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1354 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1355 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1356 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1357 ; AVX2-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1358 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1361 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v16i8:
1362 ; AVX512VPOPCNTDQ: # %bb.0:
1363 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1364 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1365 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
1366 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1367 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1368 ; AVX512VPOPCNTDQ-NEXT: retq
1370 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v16i8:
1371 ; AVX512VPOPCNTDQVL: # %bb.0:
1372 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1373 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
1374 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
1375 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1376 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1377 ; AVX512VPOPCNTDQVL-NEXT: retq
1379 ; BITALG_NOVLX-LABEL: ugt_6_v16i8:
1380 ; BITALG_NOVLX: # %bb.0:
1381 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1382 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
1383 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1384 ; BITALG_NOVLX-NEXT: vzeroupper
1385 ; BITALG_NOVLX-NEXT: retq
1387 ; BITALG-LABEL: ugt_6_v16i8:
1389 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
1390 ; BITALG-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1391 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
1393 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1394 %3 = icmp ugt <16 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
1395 %4 = sext <16 x i1> %3 to <16 x i8>
1399 define <16 x i8> @ult_7_v16i8(<16 x i8> %0) {
1400 ; SSE2-LABEL: ult_7_v16i8:
1402 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1403 ; SSE2-NEXT: psrlw $1, %xmm1
1404 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1405 ; SSE2-NEXT: psubb %xmm1, %xmm0
1406 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1407 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1408 ; SSE2-NEXT: pand %xmm1, %xmm2
1409 ; SSE2-NEXT: psrlw $2, %xmm0
1410 ; SSE2-NEXT: pand %xmm1, %xmm0
1411 ; SSE2-NEXT: paddb %xmm2, %xmm0
1412 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1413 ; SSE2-NEXT: psrlw $4, %xmm1
1414 ; SSE2-NEXT: paddb %xmm0, %xmm1
1415 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1416 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1417 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
1420 ; SSE3-LABEL: ult_7_v16i8:
1422 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1423 ; SSE3-NEXT: psrlw $1, %xmm1
1424 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1425 ; SSE3-NEXT: psubb %xmm1, %xmm0
1426 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1427 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1428 ; SSE3-NEXT: pand %xmm1, %xmm2
1429 ; SSE3-NEXT: psrlw $2, %xmm0
1430 ; SSE3-NEXT: pand %xmm1, %xmm0
1431 ; SSE3-NEXT: paddb %xmm2, %xmm0
1432 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1433 ; SSE3-NEXT: psrlw $4, %xmm1
1434 ; SSE3-NEXT: paddb %xmm0, %xmm1
1435 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1436 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1437 ; SSE3-NEXT: pcmpgtb %xmm1, %xmm0
1440 ; SSSE3-LABEL: ult_7_v16i8:
1442 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1443 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1444 ; SSSE3-NEXT: pand %xmm1, %xmm2
1445 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1446 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1447 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1448 ; SSSE3-NEXT: psrlw $4, %xmm0
1449 ; SSSE3-NEXT: pand %xmm1, %xmm0
1450 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1451 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1452 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1453 ; SSSE3-NEXT: pminub %xmm3, %xmm0
1454 ; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0
1457 ; SSE41-LABEL: ult_7_v16i8:
1459 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1460 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1461 ; SSE41-NEXT: pand %xmm1, %xmm2
1462 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1463 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1464 ; SSE41-NEXT: pshufb %xmm2, %xmm4
1465 ; SSE41-NEXT: psrlw $4, %xmm0
1466 ; SSE41-NEXT: pand %xmm1, %xmm0
1467 ; SSE41-NEXT: pshufb %xmm0, %xmm3
1468 ; SSE41-NEXT: paddb %xmm4, %xmm3
1469 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1470 ; SSE41-NEXT: pminub %xmm3, %xmm0
1471 ; SSE41-NEXT: pcmpeqb %xmm3, %xmm0
1474 ; AVX1-LABEL: ult_7_v16i8:
1476 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1477 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1478 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1479 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1480 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1481 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1482 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1483 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1484 ; AVX1-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1485 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1488 ; AVX2-LABEL: ult_7_v16i8:
1490 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1491 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1492 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1493 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1494 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1495 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1496 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1497 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1498 ; AVX2-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1499 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
1502 ; AVX512VPOPCNTDQ-LABEL: ult_7_v16i8:
1503 ; AVX512VPOPCNTDQ: # %bb.0:
1504 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1505 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1506 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
1507 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1508 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1509 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1510 ; AVX512VPOPCNTDQ-NEXT: retq
1512 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v16i8:
1513 ; AVX512VPOPCNTDQVL: # %bb.0:
1514 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1515 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
1516 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
1517 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1518 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1519 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1520 ; AVX512VPOPCNTDQVL-NEXT: retq
1522 ; BITALG_NOVLX-LABEL: ult_7_v16i8:
1523 ; BITALG_NOVLX: # %bb.0:
1524 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1525 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
1526 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1527 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1528 ; BITALG_NOVLX-NEXT: vzeroupper
1529 ; BITALG_NOVLX-NEXT: retq
1531 ; BITALG-LABEL: ult_7_v16i8:
1533 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
1534 ; BITALG-NEXT: vpcmpltub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1535 ; BITALG-NEXT: vpmovm2b %k0, %xmm0
1537 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1538 %3 = icmp ult <16 x i8> %2, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
1539 %4 = sext <16 x i1> %3 to <16 x i8>
1543 define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
1544 ; SSE-LABEL: ugt_1_v8i16:
1546 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
1547 ; SSE-NEXT: movdqa %xmm0, %xmm2
1548 ; SSE-NEXT: paddw %xmm1, %xmm2
1549 ; SSE-NEXT: pand %xmm2, %xmm0
1550 ; SSE-NEXT: pxor %xmm2, %xmm2
1551 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1552 ; SSE-NEXT: pxor %xmm1, %xmm0
1555 ; AVX1-LABEL: ugt_1_v8i16:
1557 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1558 ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm2
1559 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1560 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1561 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
1562 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1565 ; AVX2-LABEL: ugt_1_v8i16:
1567 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1568 ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm2
1569 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
1570 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1571 ; AVX2-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
1572 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1575 ; AVX512VPOPCNTDQ-LABEL: ugt_1_v8i16:
1576 ; AVX512VPOPCNTDQ: # %bb.0:
1577 ; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1578 ; AVX512VPOPCNTDQ-NEXT: vpaddw %xmm1, %xmm0, %xmm1
1579 ; AVX512VPOPCNTDQ-NEXT: vpand %xmm1, %xmm0, %xmm0
1580 ; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1581 ; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1582 ; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
1583 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1584 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1585 ; AVX512VPOPCNTDQ-NEXT: retq
1587 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v8i16:
1588 ; AVX512VPOPCNTDQVL: # %bb.0:
1589 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1590 ; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm1
1591 ; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
1592 ; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1593 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1594 ; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
1595 ; AVX512VPOPCNTDQVL-NEXT: retq
1597 ; BITALG_NOVLX-LABEL: ugt_1_v8i16:
1598 ; BITALG_NOVLX: # %bb.0:
1599 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1600 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
1601 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1602 ; BITALG_NOVLX-NEXT: vzeroupper
1603 ; BITALG_NOVLX-NEXT: retq
1605 ; BITALG-LABEL: ugt_1_v8i16:
1607 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
1608 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1609 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
1611 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1612 %3 = icmp ugt <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1613 %4 = sext <8 x i1> %3 to <8 x i16>
1617 define <8 x i16> @ult_2_v8i16(<8 x i16> %0) {
1618 ; SSE-LABEL: ult_2_v8i16:
1620 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
1621 ; SSE-NEXT: paddw %xmm0, %xmm1
1622 ; SSE-NEXT: pand %xmm1, %xmm0
1623 ; SSE-NEXT: pxor %xmm1, %xmm1
1624 ; SSE-NEXT: pcmpeqw %xmm1, %xmm0
1627 ; AVX-LABEL: ult_2_v8i16:
1629 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1630 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm1
1631 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
1632 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1633 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1636 ; BITALG_NOVLX-LABEL: ult_2_v8i16:
1637 ; BITALG_NOVLX: # %bb.0:
1638 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1639 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
1640 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
1641 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1642 ; BITALG_NOVLX-NEXT: vzeroupper
1643 ; BITALG_NOVLX-NEXT: retq
1645 ; BITALG-LABEL: ult_2_v8i16:
1647 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
1648 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1649 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
1651 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1652 %3 = icmp ult <8 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
1653 %4 = sext <8 x i1> %3 to <8 x i16>
1657 define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) {
1658 ; SSE2-LABEL: ugt_2_v8i16:
1660 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1661 ; SSE2-NEXT: psrlw $1, %xmm1
1662 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1663 ; SSE2-NEXT: psubb %xmm1, %xmm0
1664 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1665 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1666 ; SSE2-NEXT: pand %xmm1, %xmm2
1667 ; SSE2-NEXT: psrlw $2, %xmm0
1668 ; SSE2-NEXT: pand %xmm1, %xmm0
1669 ; SSE2-NEXT: paddb %xmm2, %xmm0
1670 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1671 ; SSE2-NEXT: psrlw $4, %xmm1
1672 ; SSE2-NEXT: paddb %xmm1, %xmm0
1673 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1674 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1675 ; SSE2-NEXT: psllw $8, %xmm1
1676 ; SSE2-NEXT: paddb %xmm1, %xmm0
1677 ; SSE2-NEXT: psrlw $8, %xmm0
1678 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1681 ; SSE3-LABEL: ugt_2_v8i16:
1683 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1684 ; SSE3-NEXT: psrlw $1, %xmm1
1685 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1686 ; SSE3-NEXT: psubb %xmm1, %xmm0
1687 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1688 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1689 ; SSE3-NEXT: pand %xmm1, %xmm2
1690 ; SSE3-NEXT: psrlw $2, %xmm0
1691 ; SSE3-NEXT: pand %xmm1, %xmm0
1692 ; SSE3-NEXT: paddb %xmm2, %xmm0
1693 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1694 ; SSE3-NEXT: psrlw $4, %xmm1
1695 ; SSE3-NEXT: paddb %xmm1, %xmm0
1696 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1697 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1698 ; SSE3-NEXT: psllw $8, %xmm1
1699 ; SSE3-NEXT: paddb %xmm1, %xmm0
1700 ; SSE3-NEXT: psrlw $8, %xmm0
1701 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1704 ; SSSE3-LABEL: ugt_2_v8i16:
1706 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1707 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1708 ; SSSE3-NEXT: pand %xmm1, %xmm2
1709 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1710 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1711 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1712 ; SSSE3-NEXT: psrlw $4, %xmm0
1713 ; SSSE3-NEXT: pand %xmm1, %xmm0
1714 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1715 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1716 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
1717 ; SSSE3-NEXT: psllw $8, %xmm0
1718 ; SSSE3-NEXT: paddb %xmm3, %xmm0
1719 ; SSSE3-NEXT: psrlw $8, %xmm0
1720 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1723 ; SSE41-LABEL: ugt_2_v8i16:
1725 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1726 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1727 ; SSE41-NEXT: pand %xmm1, %xmm2
1728 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1729 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1730 ; SSE41-NEXT: pshufb %xmm2, %xmm4
1731 ; SSE41-NEXT: psrlw $4, %xmm0
1732 ; SSE41-NEXT: pand %xmm1, %xmm0
1733 ; SSE41-NEXT: pshufb %xmm0, %xmm3
1734 ; SSE41-NEXT: paddb %xmm4, %xmm3
1735 ; SSE41-NEXT: movdqa %xmm3, %xmm0
1736 ; SSE41-NEXT: psllw $8, %xmm0
1737 ; SSE41-NEXT: paddb %xmm3, %xmm0
1738 ; SSE41-NEXT: psrlw $8, %xmm0
1739 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1742 ; AVX1-LABEL: ugt_2_v8i16:
1744 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1745 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1746 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1747 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1748 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1749 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1750 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1751 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1752 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
1753 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1754 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
1755 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1758 ; AVX2-LABEL: ugt_2_v8i16:
1760 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1761 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1762 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1763 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1764 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1765 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1766 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1767 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1768 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
1769 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1770 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
1771 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1774 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v8i16:
1775 ; AVX512VPOPCNTDQ: # %bb.0:
1776 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1777 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1778 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
1779 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1780 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1781 ; AVX512VPOPCNTDQ-NEXT: retq
1783 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v8i16:
1784 ; AVX512VPOPCNTDQVL: # %bb.0:
1785 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1786 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
1787 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
1788 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1789 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1790 ; AVX512VPOPCNTDQVL-NEXT: retq
1792 ; BITALG_NOVLX-LABEL: ugt_2_v8i16:
1793 ; BITALG_NOVLX: # %bb.0:
1794 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1795 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
1796 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1797 ; BITALG_NOVLX-NEXT: vzeroupper
1798 ; BITALG_NOVLX-NEXT: retq
1800 ; BITALG-LABEL: ugt_2_v8i16:
1802 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
1803 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1804 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
1806 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1807 %3 = icmp ugt <8 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
1808 %4 = sext <8 x i1> %3 to <8 x i16>
1812 define <8 x i16> @ult_3_v8i16(<8 x i16> %0) {
1813 ; SSE2-LABEL: ult_3_v8i16:
1815 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1816 ; SSE2-NEXT: psrlw $1, %xmm1
1817 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1818 ; SSE2-NEXT: psubb %xmm1, %xmm0
1819 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1820 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1821 ; SSE2-NEXT: pand %xmm1, %xmm2
1822 ; SSE2-NEXT: psrlw $2, %xmm0
1823 ; SSE2-NEXT: pand %xmm1, %xmm0
1824 ; SSE2-NEXT: paddb %xmm2, %xmm0
1825 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1826 ; SSE2-NEXT: psrlw $4, %xmm1
1827 ; SSE2-NEXT: paddb %xmm0, %xmm1
1828 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1829 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1830 ; SSE2-NEXT: psllw $8, %xmm2
1831 ; SSE2-NEXT: paddb %xmm1, %xmm2
1832 ; SSE2-NEXT: psrlw $8, %xmm2
1833 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1834 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
1837 ; SSE3-LABEL: ult_3_v8i16:
1839 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1840 ; SSE3-NEXT: psrlw $1, %xmm1
1841 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1842 ; SSE3-NEXT: psubb %xmm1, %xmm0
1843 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1844 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1845 ; SSE3-NEXT: pand %xmm1, %xmm2
1846 ; SSE3-NEXT: psrlw $2, %xmm0
1847 ; SSE3-NEXT: pand %xmm1, %xmm0
1848 ; SSE3-NEXT: paddb %xmm2, %xmm0
1849 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1850 ; SSE3-NEXT: psrlw $4, %xmm1
1851 ; SSE3-NEXT: paddb %xmm0, %xmm1
1852 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1853 ; SSE3-NEXT: movdqa %xmm1, %xmm2
1854 ; SSE3-NEXT: psllw $8, %xmm2
1855 ; SSE3-NEXT: paddb %xmm1, %xmm2
1856 ; SSE3-NEXT: psrlw $8, %xmm2
1857 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1858 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
1861 ; SSSE3-LABEL: ult_3_v8i16:
1863 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1864 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1865 ; SSSE3-NEXT: pand %xmm1, %xmm2
1866 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1867 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1868 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1869 ; SSSE3-NEXT: psrlw $4, %xmm0
1870 ; SSSE3-NEXT: pand %xmm1, %xmm0
1871 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1872 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1873 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
1874 ; SSSE3-NEXT: psllw $8, %xmm1
1875 ; SSSE3-NEXT: paddb %xmm3, %xmm1
1876 ; SSSE3-NEXT: psrlw $8, %xmm1
1877 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1878 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
1881 ; SSE41-LABEL: ult_3_v8i16:
1883 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1884 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1885 ; SSE41-NEXT: pand %xmm1, %xmm2
1886 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1887 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1888 ; SSE41-NEXT: pshufb %xmm2, %xmm4
1889 ; SSE41-NEXT: psrlw $4, %xmm0
1890 ; SSE41-NEXT: pand %xmm1, %xmm0
1891 ; SSE41-NEXT: pshufb %xmm0, %xmm3
1892 ; SSE41-NEXT: paddb %xmm4, %xmm3
1893 ; SSE41-NEXT: movdqa %xmm3, %xmm1
1894 ; SSE41-NEXT: psllw $8, %xmm1
1895 ; SSE41-NEXT: paddb %xmm3, %xmm1
1896 ; SSE41-NEXT: psrlw $8, %xmm1
1897 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1898 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
1901 ; AVX1-LABEL: ult_3_v8i16:
1903 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1904 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1905 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1906 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1907 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1908 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1909 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1910 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1911 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
1912 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1913 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
1914 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1915 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1918 ; AVX2-LABEL: ult_3_v8i16:
1920 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1921 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1922 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1923 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1924 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1925 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1926 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1927 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1928 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
1929 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1930 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
1931 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1932 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1935 ; AVX512VPOPCNTDQ-LABEL: ult_3_v8i16:
1936 ; AVX512VPOPCNTDQ: # %bb.0:
1937 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1938 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1939 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
1940 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1941 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1942 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1943 ; AVX512VPOPCNTDQ-NEXT: retq
1945 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v8i16:
1946 ; AVX512VPOPCNTDQVL: # %bb.0:
1947 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1948 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
1949 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
1950 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1951 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1952 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1953 ; AVX512VPOPCNTDQVL-NEXT: retq
1955 ; BITALG_NOVLX-LABEL: ult_3_v8i16:
1956 ; BITALG_NOVLX: # %bb.0:
1957 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1958 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
1959 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1960 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1961 ; BITALG_NOVLX-NEXT: vzeroupper
1962 ; BITALG_NOVLX-NEXT: retq
1964 ; BITALG-LABEL: ult_3_v8i16:
1966 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
1967 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
1968 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
1970 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1971 %3 = icmp ult <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
1972 %4 = sext <8 x i1> %3 to <8 x i16>
1976 define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) {
1977 ; SSE2-LABEL: ugt_3_v8i16:
1979 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1980 ; SSE2-NEXT: psrlw $1, %xmm1
1981 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1982 ; SSE2-NEXT: psubb %xmm1, %xmm0
1983 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1984 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1985 ; SSE2-NEXT: pand %xmm1, %xmm2
1986 ; SSE2-NEXT: psrlw $2, %xmm0
1987 ; SSE2-NEXT: pand %xmm1, %xmm0
1988 ; SSE2-NEXT: paddb %xmm2, %xmm0
1989 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1990 ; SSE2-NEXT: psrlw $4, %xmm1
1991 ; SSE2-NEXT: paddb %xmm1, %xmm0
1992 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1993 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1994 ; SSE2-NEXT: psllw $8, %xmm1
1995 ; SSE2-NEXT: paddb %xmm1, %xmm0
1996 ; SSE2-NEXT: psrlw $8, %xmm0
1997 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2000 ; SSE3-LABEL: ugt_3_v8i16:
2002 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2003 ; SSE3-NEXT: psrlw $1, %xmm1
2004 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2005 ; SSE3-NEXT: psubb %xmm1, %xmm0
2006 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2007 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2008 ; SSE3-NEXT: pand %xmm1, %xmm2
2009 ; SSE3-NEXT: psrlw $2, %xmm0
2010 ; SSE3-NEXT: pand %xmm1, %xmm0
2011 ; SSE3-NEXT: paddb %xmm2, %xmm0
2012 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2013 ; SSE3-NEXT: psrlw $4, %xmm1
2014 ; SSE3-NEXT: paddb %xmm1, %xmm0
2015 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2016 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2017 ; SSE3-NEXT: psllw $8, %xmm1
2018 ; SSE3-NEXT: paddb %xmm1, %xmm0
2019 ; SSE3-NEXT: psrlw $8, %xmm0
2020 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2023 ; SSSE3-LABEL: ugt_3_v8i16:
2025 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2026 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2027 ; SSSE3-NEXT: pand %xmm1, %xmm2
2028 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2029 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2030 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2031 ; SSSE3-NEXT: psrlw $4, %xmm0
2032 ; SSSE3-NEXT: pand %xmm1, %xmm0
2033 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2034 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2035 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
2036 ; SSSE3-NEXT: psllw $8, %xmm0
2037 ; SSSE3-NEXT: paddb %xmm3, %xmm0
2038 ; SSSE3-NEXT: psrlw $8, %xmm0
2039 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2042 ; SSE41-LABEL: ugt_3_v8i16:
2044 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2045 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2046 ; SSE41-NEXT: pand %xmm1, %xmm2
2047 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2048 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2049 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2050 ; SSE41-NEXT: psrlw $4, %xmm0
2051 ; SSE41-NEXT: pand %xmm1, %xmm0
2052 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2053 ; SSE41-NEXT: paddb %xmm4, %xmm3
2054 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2055 ; SSE41-NEXT: psllw $8, %xmm0
2056 ; SSE41-NEXT: paddb %xmm3, %xmm0
2057 ; SSE41-NEXT: psrlw $8, %xmm0
2058 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2061 ; AVX1-LABEL: ugt_3_v8i16:
2063 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2064 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2065 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2066 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2067 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2068 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2069 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2070 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2071 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2072 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2073 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2074 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2077 ; AVX2-LABEL: ugt_3_v8i16:
2079 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2080 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2081 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2082 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2083 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2084 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2085 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2086 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2087 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2088 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2089 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2090 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2093 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v8i16:
2094 ; AVX512VPOPCNTDQ: # %bb.0:
2095 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2096 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2097 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2098 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2099 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2100 ; AVX512VPOPCNTDQ-NEXT: retq
2102 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v8i16:
2103 ; AVX512VPOPCNTDQVL: # %bb.0:
2104 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2105 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2106 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2107 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2108 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2109 ; AVX512VPOPCNTDQVL-NEXT: retq
2111 ; BITALG_NOVLX-LABEL: ugt_3_v8i16:
2112 ; BITALG_NOVLX: # %bb.0:
2113 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2114 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2115 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2116 ; BITALG_NOVLX-NEXT: vzeroupper
2117 ; BITALG_NOVLX-NEXT: retq
2119 ; BITALG-LABEL: ugt_3_v8i16:
2121 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2122 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2123 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
2125 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2126 %3 = icmp ugt <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
2127 %4 = sext <8 x i1> %3 to <8 x i16>
2131 define <8 x i16> @ult_4_v8i16(<8 x i16> %0) {
2132 ; SSE2-LABEL: ult_4_v8i16:
2134 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2135 ; SSE2-NEXT: psrlw $1, %xmm1
2136 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2137 ; SSE2-NEXT: psubb %xmm1, %xmm0
2138 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2139 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2140 ; SSE2-NEXT: pand %xmm1, %xmm2
2141 ; SSE2-NEXT: psrlw $2, %xmm0
2142 ; SSE2-NEXT: pand %xmm1, %xmm0
2143 ; SSE2-NEXT: paddb %xmm2, %xmm0
2144 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2145 ; SSE2-NEXT: psrlw $4, %xmm1
2146 ; SSE2-NEXT: paddb %xmm0, %xmm1
2147 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2148 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2149 ; SSE2-NEXT: psllw $8, %xmm2
2150 ; SSE2-NEXT: paddb %xmm1, %xmm2
2151 ; SSE2-NEXT: psrlw $8, %xmm2
2152 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2153 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
2156 ; SSE3-LABEL: ult_4_v8i16:
2158 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2159 ; SSE3-NEXT: psrlw $1, %xmm1
2160 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2161 ; SSE3-NEXT: psubb %xmm1, %xmm0
2162 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2163 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2164 ; SSE3-NEXT: pand %xmm1, %xmm2
2165 ; SSE3-NEXT: psrlw $2, %xmm0
2166 ; SSE3-NEXT: pand %xmm1, %xmm0
2167 ; SSE3-NEXT: paddb %xmm2, %xmm0
2168 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2169 ; SSE3-NEXT: psrlw $4, %xmm1
2170 ; SSE3-NEXT: paddb %xmm0, %xmm1
2171 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2172 ; SSE3-NEXT: movdqa %xmm1, %xmm2
2173 ; SSE3-NEXT: psllw $8, %xmm2
2174 ; SSE3-NEXT: paddb %xmm1, %xmm2
2175 ; SSE3-NEXT: psrlw $8, %xmm2
2176 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2177 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
2180 ; SSSE3-LABEL: ult_4_v8i16:
2182 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2183 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2184 ; SSSE3-NEXT: pand %xmm1, %xmm2
2185 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2186 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2187 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2188 ; SSSE3-NEXT: psrlw $4, %xmm0
2189 ; SSSE3-NEXT: pand %xmm1, %xmm0
2190 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2191 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2192 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
2193 ; SSSE3-NEXT: psllw $8, %xmm1
2194 ; SSSE3-NEXT: paddb %xmm3, %xmm1
2195 ; SSSE3-NEXT: psrlw $8, %xmm1
2196 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2197 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
2200 ; SSE41-LABEL: ult_4_v8i16:
2202 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2203 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2204 ; SSE41-NEXT: pand %xmm1, %xmm2
2205 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2206 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2207 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2208 ; SSE41-NEXT: psrlw $4, %xmm0
2209 ; SSE41-NEXT: pand %xmm1, %xmm0
2210 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2211 ; SSE41-NEXT: paddb %xmm4, %xmm3
2212 ; SSE41-NEXT: movdqa %xmm3, %xmm1
2213 ; SSE41-NEXT: psllw $8, %xmm1
2214 ; SSE41-NEXT: paddb %xmm3, %xmm1
2215 ; SSE41-NEXT: psrlw $8, %xmm1
2216 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2217 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
2220 ; AVX1-LABEL: ult_4_v8i16:
2222 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2223 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2224 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2225 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2226 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2227 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2228 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2229 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2230 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2231 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2232 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2233 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2234 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2237 ; AVX2-LABEL: ult_4_v8i16:
2239 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2240 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2241 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2242 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2243 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2244 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2245 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2246 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2247 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2248 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2249 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2250 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2251 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2254 ; AVX512VPOPCNTDQ-LABEL: ult_4_v8i16:
2255 ; AVX512VPOPCNTDQ: # %bb.0:
2256 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2257 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2258 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2259 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2260 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2261 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2262 ; AVX512VPOPCNTDQ-NEXT: retq
2264 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v8i16:
2265 ; AVX512VPOPCNTDQVL: # %bb.0:
2266 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2267 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2268 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2269 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2270 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2271 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2272 ; AVX512VPOPCNTDQVL-NEXT: retq
2274 ; BITALG_NOVLX-LABEL: ult_4_v8i16:
2275 ; BITALG_NOVLX: # %bb.0:
2276 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2277 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2278 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2279 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2280 ; BITALG_NOVLX-NEXT: vzeroupper
2281 ; BITALG_NOVLX-NEXT: retq
2283 ; BITALG-LABEL: ult_4_v8i16:
2285 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2286 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2287 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
2289 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2290 %3 = icmp ult <8 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2291 %4 = sext <8 x i1> %3 to <8 x i16>
2295 define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) {
2296 ; SSE2-LABEL: ugt_4_v8i16:
2298 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2299 ; SSE2-NEXT: psrlw $1, %xmm1
2300 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2301 ; SSE2-NEXT: psubb %xmm1, %xmm0
2302 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2303 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2304 ; SSE2-NEXT: pand %xmm1, %xmm2
2305 ; SSE2-NEXT: psrlw $2, %xmm0
2306 ; SSE2-NEXT: pand %xmm1, %xmm0
2307 ; SSE2-NEXT: paddb %xmm2, %xmm0
2308 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2309 ; SSE2-NEXT: psrlw $4, %xmm1
2310 ; SSE2-NEXT: paddb %xmm1, %xmm0
2311 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2312 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2313 ; SSE2-NEXT: psllw $8, %xmm1
2314 ; SSE2-NEXT: paddb %xmm1, %xmm0
2315 ; SSE2-NEXT: psrlw $8, %xmm0
2316 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2319 ; SSE3-LABEL: ugt_4_v8i16:
2321 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2322 ; SSE3-NEXT: psrlw $1, %xmm1
2323 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2324 ; SSE3-NEXT: psubb %xmm1, %xmm0
2325 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2326 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2327 ; SSE3-NEXT: pand %xmm1, %xmm2
2328 ; SSE3-NEXT: psrlw $2, %xmm0
2329 ; SSE3-NEXT: pand %xmm1, %xmm0
2330 ; SSE3-NEXT: paddb %xmm2, %xmm0
2331 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2332 ; SSE3-NEXT: psrlw $4, %xmm1
2333 ; SSE3-NEXT: paddb %xmm1, %xmm0
2334 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2335 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2336 ; SSE3-NEXT: psllw $8, %xmm1
2337 ; SSE3-NEXT: paddb %xmm1, %xmm0
2338 ; SSE3-NEXT: psrlw $8, %xmm0
2339 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2342 ; SSSE3-LABEL: ugt_4_v8i16:
2344 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2345 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2346 ; SSSE3-NEXT: pand %xmm1, %xmm2
2347 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2348 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2349 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2350 ; SSSE3-NEXT: psrlw $4, %xmm0
2351 ; SSSE3-NEXT: pand %xmm1, %xmm0
2352 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2353 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2354 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
2355 ; SSSE3-NEXT: psllw $8, %xmm0
2356 ; SSSE3-NEXT: paddb %xmm3, %xmm0
2357 ; SSSE3-NEXT: psrlw $8, %xmm0
2358 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2361 ; SSE41-LABEL: ugt_4_v8i16:
2363 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2364 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2365 ; SSE41-NEXT: pand %xmm1, %xmm2
2366 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2367 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2368 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2369 ; SSE41-NEXT: psrlw $4, %xmm0
2370 ; SSE41-NEXT: pand %xmm1, %xmm0
2371 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2372 ; SSE41-NEXT: paddb %xmm4, %xmm3
2373 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2374 ; SSE41-NEXT: psllw $8, %xmm0
2375 ; SSE41-NEXT: paddb %xmm3, %xmm0
2376 ; SSE41-NEXT: psrlw $8, %xmm0
2377 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2380 ; AVX1-LABEL: ugt_4_v8i16:
2382 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2383 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2384 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2385 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2386 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2387 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2388 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2389 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2390 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2391 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2392 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2393 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2396 ; AVX2-LABEL: ugt_4_v8i16:
2398 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2399 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2400 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2401 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2402 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2403 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2404 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2405 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2406 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2407 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2408 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2409 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2412 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v8i16:
2413 ; AVX512VPOPCNTDQ: # %bb.0:
2414 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2415 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2416 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2417 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2418 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2419 ; AVX512VPOPCNTDQ-NEXT: retq
2421 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v8i16:
2422 ; AVX512VPOPCNTDQVL: # %bb.0:
2423 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2424 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2425 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2426 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2427 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2428 ; AVX512VPOPCNTDQVL-NEXT: retq
2430 ; BITALG_NOVLX-LABEL: ugt_4_v8i16:
2431 ; BITALG_NOVLX: # %bb.0:
2432 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2433 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2434 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2435 ; BITALG_NOVLX-NEXT: vzeroupper
2436 ; BITALG_NOVLX-NEXT: retq
2438 ; BITALG-LABEL: ugt_4_v8i16:
2440 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2441 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2442 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
2444 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2445 %3 = icmp ugt <8 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2446 %4 = sext <8 x i1> %3 to <8 x i16>
2450 define <8 x i16> @ult_5_v8i16(<8 x i16> %0) {
2451 ; SSE2-LABEL: ult_5_v8i16:
2453 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2454 ; SSE2-NEXT: psrlw $1, %xmm1
2455 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2456 ; SSE2-NEXT: psubb %xmm1, %xmm0
2457 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2458 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2459 ; SSE2-NEXT: pand %xmm1, %xmm2
2460 ; SSE2-NEXT: psrlw $2, %xmm0
2461 ; SSE2-NEXT: pand %xmm1, %xmm0
2462 ; SSE2-NEXT: paddb %xmm2, %xmm0
2463 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2464 ; SSE2-NEXT: psrlw $4, %xmm1
2465 ; SSE2-NEXT: paddb %xmm0, %xmm1
2466 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2467 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2468 ; SSE2-NEXT: psllw $8, %xmm2
2469 ; SSE2-NEXT: paddb %xmm1, %xmm2
2470 ; SSE2-NEXT: psrlw $8, %xmm2
2471 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2472 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
2475 ; SSE3-LABEL: ult_5_v8i16:
2477 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2478 ; SSE3-NEXT: psrlw $1, %xmm1
2479 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2480 ; SSE3-NEXT: psubb %xmm1, %xmm0
2481 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2482 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2483 ; SSE3-NEXT: pand %xmm1, %xmm2
2484 ; SSE3-NEXT: psrlw $2, %xmm0
2485 ; SSE3-NEXT: pand %xmm1, %xmm0
2486 ; SSE3-NEXT: paddb %xmm2, %xmm0
2487 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2488 ; SSE3-NEXT: psrlw $4, %xmm1
2489 ; SSE3-NEXT: paddb %xmm0, %xmm1
2490 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2491 ; SSE3-NEXT: movdqa %xmm1, %xmm2
2492 ; SSE3-NEXT: psllw $8, %xmm2
2493 ; SSE3-NEXT: paddb %xmm1, %xmm2
2494 ; SSE3-NEXT: psrlw $8, %xmm2
2495 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2496 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
2499 ; SSSE3-LABEL: ult_5_v8i16:
2501 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2502 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2503 ; SSSE3-NEXT: pand %xmm1, %xmm2
2504 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2505 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2506 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2507 ; SSSE3-NEXT: psrlw $4, %xmm0
2508 ; SSSE3-NEXT: pand %xmm1, %xmm0
2509 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2510 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2511 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
2512 ; SSSE3-NEXT: psllw $8, %xmm1
2513 ; SSSE3-NEXT: paddb %xmm3, %xmm1
2514 ; SSSE3-NEXT: psrlw $8, %xmm1
2515 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2516 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
2519 ; SSE41-LABEL: ult_5_v8i16:
2521 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2522 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2523 ; SSE41-NEXT: pand %xmm1, %xmm2
2524 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2525 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2526 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2527 ; SSE41-NEXT: psrlw $4, %xmm0
2528 ; SSE41-NEXT: pand %xmm1, %xmm0
2529 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2530 ; SSE41-NEXT: paddb %xmm4, %xmm3
2531 ; SSE41-NEXT: movdqa %xmm3, %xmm1
2532 ; SSE41-NEXT: psllw $8, %xmm1
2533 ; SSE41-NEXT: paddb %xmm3, %xmm1
2534 ; SSE41-NEXT: psrlw $8, %xmm1
2535 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2536 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
2539 ; AVX1-LABEL: ult_5_v8i16:
2541 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2542 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2543 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2544 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2545 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2546 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2547 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2548 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2549 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2550 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2551 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2552 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2553 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2556 ; AVX2-LABEL: ult_5_v8i16:
2558 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2559 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2560 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2561 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2562 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2563 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2564 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2565 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2566 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2567 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2568 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2569 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2570 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2573 ; AVX512VPOPCNTDQ-LABEL: ult_5_v8i16:
2574 ; AVX512VPOPCNTDQ: # %bb.0:
2575 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2576 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2577 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2578 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2579 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2580 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2581 ; AVX512VPOPCNTDQ-NEXT: retq
2583 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v8i16:
2584 ; AVX512VPOPCNTDQVL: # %bb.0:
2585 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2586 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2587 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2588 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2589 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2590 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2591 ; AVX512VPOPCNTDQVL-NEXT: retq
2593 ; BITALG_NOVLX-LABEL: ult_5_v8i16:
2594 ; BITALG_NOVLX: # %bb.0:
2595 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2596 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2597 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2598 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2599 ; BITALG_NOVLX-NEXT: vzeroupper
2600 ; BITALG_NOVLX-NEXT: retq
2602 ; BITALG-LABEL: ult_5_v8i16:
2604 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2605 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2606 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
2608 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2609 %3 = icmp ult <8 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
2610 %4 = sext <8 x i1> %3 to <8 x i16>
2614 define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) {
2615 ; SSE2-LABEL: ugt_5_v8i16:
2617 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2618 ; SSE2-NEXT: psrlw $1, %xmm1
2619 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2620 ; SSE2-NEXT: psubb %xmm1, %xmm0
2621 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2622 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2623 ; SSE2-NEXT: pand %xmm1, %xmm2
2624 ; SSE2-NEXT: psrlw $2, %xmm0
2625 ; SSE2-NEXT: pand %xmm1, %xmm0
2626 ; SSE2-NEXT: paddb %xmm2, %xmm0
2627 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2628 ; SSE2-NEXT: psrlw $4, %xmm1
2629 ; SSE2-NEXT: paddb %xmm1, %xmm0
2630 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2631 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2632 ; SSE2-NEXT: psllw $8, %xmm1
2633 ; SSE2-NEXT: paddb %xmm1, %xmm0
2634 ; SSE2-NEXT: psrlw $8, %xmm0
2635 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2638 ; SSE3-LABEL: ugt_5_v8i16:
2640 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2641 ; SSE3-NEXT: psrlw $1, %xmm1
2642 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2643 ; SSE3-NEXT: psubb %xmm1, %xmm0
2644 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2645 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2646 ; SSE3-NEXT: pand %xmm1, %xmm2
2647 ; SSE3-NEXT: psrlw $2, %xmm0
2648 ; SSE3-NEXT: pand %xmm1, %xmm0
2649 ; SSE3-NEXT: paddb %xmm2, %xmm0
2650 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2651 ; SSE3-NEXT: psrlw $4, %xmm1
2652 ; SSE3-NEXT: paddb %xmm1, %xmm0
2653 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2654 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2655 ; SSE3-NEXT: psllw $8, %xmm1
2656 ; SSE3-NEXT: paddb %xmm1, %xmm0
2657 ; SSE3-NEXT: psrlw $8, %xmm0
2658 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2661 ; SSSE3-LABEL: ugt_5_v8i16:
2663 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2664 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2665 ; SSSE3-NEXT: pand %xmm1, %xmm2
2666 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2667 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2668 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2669 ; SSSE3-NEXT: psrlw $4, %xmm0
2670 ; SSSE3-NEXT: pand %xmm1, %xmm0
2671 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2672 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2673 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
2674 ; SSSE3-NEXT: psllw $8, %xmm0
2675 ; SSSE3-NEXT: paddb %xmm3, %xmm0
2676 ; SSSE3-NEXT: psrlw $8, %xmm0
2677 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2680 ; SSE41-LABEL: ugt_5_v8i16:
2682 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2683 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2684 ; SSE41-NEXT: pand %xmm1, %xmm2
2685 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2686 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2687 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2688 ; SSE41-NEXT: psrlw $4, %xmm0
2689 ; SSE41-NEXT: pand %xmm1, %xmm0
2690 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2691 ; SSE41-NEXT: paddb %xmm4, %xmm3
2692 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2693 ; SSE41-NEXT: psllw $8, %xmm0
2694 ; SSE41-NEXT: paddb %xmm3, %xmm0
2695 ; SSE41-NEXT: psrlw $8, %xmm0
2696 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2699 ; AVX1-LABEL: ugt_5_v8i16:
2701 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2702 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2703 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2704 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2705 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2706 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2707 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2708 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2709 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2710 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2711 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2712 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2715 ; AVX2-LABEL: ugt_5_v8i16:
2717 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2718 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2719 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2720 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2721 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2722 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2723 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2724 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2725 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2726 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2727 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2728 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2731 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v8i16:
2732 ; AVX512VPOPCNTDQ: # %bb.0:
2733 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2734 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2735 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2736 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2737 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2738 ; AVX512VPOPCNTDQ-NEXT: retq
2740 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v8i16:
2741 ; AVX512VPOPCNTDQVL: # %bb.0:
2742 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2743 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2744 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2745 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2746 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2747 ; AVX512VPOPCNTDQVL-NEXT: retq
2749 ; BITALG_NOVLX-LABEL: ugt_5_v8i16:
2750 ; BITALG_NOVLX: # %bb.0:
2751 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2752 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2753 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2754 ; BITALG_NOVLX-NEXT: vzeroupper
2755 ; BITALG_NOVLX-NEXT: retq
2757 ; BITALG-LABEL: ugt_5_v8i16:
2759 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2760 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2761 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
2763 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2764 %3 = icmp ugt <8 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
2765 %4 = sext <8 x i1> %3 to <8 x i16>
2769 define <8 x i16> @ult_6_v8i16(<8 x i16> %0) {
2770 ; SSE2-LABEL: ult_6_v8i16:
2772 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2773 ; SSE2-NEXT: psrlw $1, %xmm1
2774 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2775 ; SSE2-NEXT: psubb %xmm1, %xmm0
2776 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2777 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2778 ; SSE2-NEXT: pand %xmm1, %xmm2
2779 ; SSE2-NEXT: psrlw $2, %xmm0
2780 ; SSE2-NEXT: pand %xmm1, %xmm0
2781 ; SSE2-NEXT: paddb %xmm2, %xmm0
2782 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2783 ; SSE2-NEXT: psrlw $4, %xmm1
2784 ; SSE2-NEXT: paddb %xmm0, %xmm1
2785 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2786 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2787 ; SSE2-NEXT: psllw $8, %xmm2
2788 ; SSE2-NEXT: paddb %xmm1, %xmm2
2789 ; SSE2-NEXT: psrlw $8, %xmm2
2790 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2791 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
2794 ; SSE3-LABEL: ult_6_v8i16:
2796 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2797 ; SSE3-NEXT: psrlw $1, %xmm1
2798 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2799 ; SSE3-NEXT: psubb %xmm1, %xmm0
2800 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2801 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2802 ; SSE3-NEXT: pand %xmm1, %xmm2
2803 ; SSE3-NEXT: psrlw $2, %xmm0
2804 ; SSE3-NEXT: pand %xmm1, %xmm0
2805 ; SSE3-NEXT: paddb %xmm2, %xmm0
2806 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2807 ; SSE3-NEXT: psrlw $4, %xmm1
2808 ; SSE3-NEXT: paddb %xmm0, %xmm1
2809 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2810 ; SSE3-NEXT: movdqa %xmm1, %xmm2
2811 ; SSE3-NEXT: psllw $8, %xmm2
2812 ; SSE3-NEXT: paddb %xmm1, %xmm2
2813 ; SSE3-NEXT: psrlw $8, %xmm2
2814 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2815 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
2818 ; SSSE3-LABEL: ult_6_v8i16:
2820 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2821 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2822 ; SSSE3-NEXT: pand %xmm1, %xmm2
2823 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2824 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2825 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2826 ; SSSE3-NEXT: psrlw $4, %xmm0
2827 ; SSSE3-NEXT: pand %xmm1, %xmm0
2828 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2829 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2830 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
2831 ; SSSE3-NEXT: psllw $8, %xmm1
2832 ; SSSE3-NEXT: paddb %xmm3, %xmm1
2833 ; SSSE3-NEXT: psrlw $8, %xmm1
2834 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2835 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
2838 ; SSE41-LABEL: ult_6_v8i16:
2840 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2841 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2842 ; SSE41-NEXT: pand %xmm1, %xmm2
2843 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2844 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2845 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2846 ; SSE41-NEXT: psrlw $4, %xmm0
2847 ; SSE41-NEXT: pand %xmm1, %xmm0
2848 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2849 ; SSE41-NEXT: paddb %xmm4, %xmm3
2850 ; SSE41-NEXT: movdqa %xmm3, %xmm1
2851 ; SSE41-NEXT: psllw $8, %xmm1
2852 ; SSE41-NEXT: paddb %xmm3, %xmm1
2853 ; SSE41-NEXT: psrlw $8, %xmm1
2854 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2855 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
2858 ; AVX1-LABEL: ult_6_v8i16:
2860 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2861 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2862 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2863 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2864 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2865 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2866 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2867 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2868 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2869 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2870 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2871 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2872 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2875 ; AVX2-LABEL: ult_6_v8i16:
2877 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2878 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2879 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2880 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2881 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2882 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2883 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2884 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2885 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2886 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2887 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2888 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2889 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2892 ; AVX512VPOPCNTDQ-LABEL: ult_6_v8i16:
2893 ; AVX512VPOPCNTDQ: # %bb.0:
2894 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2895 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2896 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2897 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2898 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2899 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2900 ; AVX512VPOPCNTDQ-NEXT: retq
2902 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v8i16:
2903 ; AVX512VPOPCNTDQVL: # %bb.0:
2904 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2905 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2906 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2907 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2908 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2909 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2910 ; AVX512VPOPCNTDQVL-NEXT: retq
2912 ; BITALG_NOVLX-LABEL: ult_6_v8i16:
2913 ; BITALG_NOVLX: # %bb.0:
2914 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2915 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2916 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2917 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2918 ; BITALG_NOVLX-NEXT: vzeroupper
2919 ; BITALG_NOVLX-NEXT: retq
2921 ; BITALG-LABEL: ult_6_v8i16:
2923 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2924 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
2925 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
2927 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2928 %3 = icmp ult <8 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
2929 %4 = sext <8 x i1> %3 to <8 x i16>
2933 define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) {
2934 ; SSE2-LABEL: ugt_6_v8i16:
2936 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2937 ; SSE2-NEXT: psrlw $1, %xmm1
2938 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2939 ; SSE2-NEXT: psubb %xmm1, %xmm0
2940 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2941 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2942 ; SSE2-NEXT: pand %xmm1, %xmm2
2943 ; SSE2-NEXT: psrlw $2, %xmm0
2944 ; SSE2-NEXT: pand %xmm1, %xmm0
2945 ; SSE2-NEXT: paddb %xmm2, %xmm0
2946 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2947 ; SSE2-NEXT: psrlw $4, %xmm1
2948 ; SSE2-NEXT: paddb %xmm1, %xmm0
2949 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2950 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2951 ; SSE2-NEXT: psllw $8, %xmm1
2952 ; SSE2-NEXT: paddb %xmm1, %xmm0
2953 ; SSE2-NEXT: psrlw $8, %xmm0
2954 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2957 ; SSE3-LABEL: ugt_6_v8i16:
2959 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2960 ; SSE3-NEXT: psrlw $1, %xmm1
2961 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2962 ; SSE3-NEXT: psubb %xmm1, %xmm0
2963 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2964 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2965 ; SSE3-NEXT: pand %xmm1, %xmm2
2966 ; SSE3-NEXT: psrlw $2, %xmm0
2967 ; SSE3-NEXT: pand %xmm1, %xmm0
2968 ; SSE3-NEXT: paddb %xmm2, %xmm0
2969 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2970 ; SSE3-NEXT: psrlw $4, %xmm1
2971 ; SSE3-NEXT: paddb %xmm1, %xmm0
2972 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2973 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2974 ; SSE3-NEXT: psllw $8, %xmm1
2975 ; SSE3-NEXT: paddb %xmm1, %xmm0
2976 ; SSE3-NEXT: psrlw $8, %xmm0
2977 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2980 ; SSSE3-LABEL: ugt_6_v8i16:
2982 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2983 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2984 ; SSSE3-NEXT: pand %xmm1, %xmm2
2985 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2986 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2987 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2988 ; SSSE3-NEXT: psrlw $4, %xmm0
2989 ; SSSE3-NEXT: pand %xmm1, %xmm0
2990 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2991 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2992 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
2993 ; SSSE3-NEXT: psllw $8, %xmm0
2994 ; SSSE3-NEXT: paddb %xmm3, %xmm0
2995 ; SSSE3-NEXT: psrlw $8, %xmm0
2996 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2999 ; SSE41-LABEL: ugt_6_v8i16:
3001 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3002 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3003 ; SSE41-NEXT: pand %xmm1, %xmm2
3004 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3005 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3006 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3007 ; SSE41-NEXT: psrlw $4, %xmm0
3008 ; SSE41-NEXT: pand %xmm1, %xmm0
3009 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3010 ; SSE41-NEXT: paddb %xmm4, %xmm3
3011 ; SSE41-NEXT: movdqa %xmm3, %xmm0
3012 ; SSE41-NEXT: psllw $8, %xmm0
3013 ; SSE41-NEXT: paddb %xmm3, %xmm0
3014 ; SSE41-NEXT: psrlw $8, %xmm0
3015 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3018 ; AVX1-LABEL: ugt_6_v8i16:
3020 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3021 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3022 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3023 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3024 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3025 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3026 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3027 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3028 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3029 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3030 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3031 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3034 ; AVX2-LABEL: ugt_6_v8i16:
3036 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3037 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3038 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3039 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3040 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3041 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3042 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3043 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3044 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3045 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3046 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3047 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3050 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v8i16:
3051 ; AVX512VPOPCNTDQ: # %bb.0:
3052 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3053 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3054 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3055 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3056 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3057 ; AVX512VPOPCNTDQ-NEXT: retq
3059 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v8i16:
3060 ; AVX512VPOPCNTDQVL: # %bb.0:
3061 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3062 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3063 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3064 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3065 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3066 ; AVX512VPOPCNTDQVL-NEXT: retq
3068 ; BITALG_NOVLX-LABEL: ugt_6_v8i16:
3069 ; BITALG_NOVLX: # %bb.0:
3070 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3071 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3072 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3073 ; BITALG_NOVLX-NEXT: vzeroupper
3074 ; BITALG_NOVLX-NEXT: retq
3076 ; BITALG-LABEL: ugt_6_v8i16:
3078 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3079 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
3080 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
3082 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3083 %3 = icmp ugt <8 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
3084 %4 = sext <8 x i1> %3 to <8 x i16>
3088 define <8 x i16> @ult_7_v8i16(<8 x i16> %0) {
3089 ; SSE2-LABEL: ult_7_v8i16:
3091 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3092 ; SSE2-NEXT: psrlw $1, %xmm1
3093 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3094 ; SSE2-NEXT: psubb %xmm1, %xmm0
3095 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3096 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3097 ; SSE2-NEXT: pand %xmm1, %xmm2
3098 ; SSE2-NEXT: psrlw $2, %xmm0
3099 ; SSE2-NEXT: pand %xmm1, %xmm0
3100 ; SSE2-NEXT: paddb %xmm2, %xmm0
3101 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3102 ; SSE2-NEXT: psrlw $4, %xmm1
3103 ; SSE2-NEXT: paddb %xmm0, %xmm1
3104 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3105 ; SSE2-NEXT: movdqa %xmm1, %xmm2
3106 ; SSE2-NEXT: psllw $8, %xmm2
3107 ; SSE2-NEXT: paddb %xmm1, %xmm2
3108 ; SSE2-NEXT: psrlw $8, %xmm2
3109 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3110 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
3113 ; SSE3-LABEL: ult_7_v8i16:
3115 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3116 ; SSE3-NEXT: psrlw $1, %xmm1
3117 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3118 ; SSE3-NEXT: psubb %xmm1, %xmm0
3119 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3120 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3121 ; SSE3-NEXT: pand %xmm1, %xmm2
3122 ; SSE3-NEXT: psrlw $2, %xmm0
3123 ; SSE3-NEXT: pand %xmm1, %xmm0
3124 ; SSE3-NEXT: paddb %xmm2, %xmm0
3125 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3126 ; SSE3-NEXT: psrlw $4, %xmm1
3127 ; SSE3-NEXT: paddb %xmm0, %xmm1
3128 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3129 ; SSE3-NEXT: movdqa %xmm1, %xmm2
3130 ; SSE3-NEXT: psllw $8, %xmm2
3131 ; SSE3-NEXT: paddb %xmm1, %xmm2
3132 ; SSE3-NEXT: psrlw $8, %xmm2
3133 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3134 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
3137 ; SSSE3-LABEL: ult_7_v8i16:
3139 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3140 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3141 ; SSSE3-NEXT: pand %xmm1, %xmm2
3142 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3143 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3144 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3145 ; SSSE3-NEXT: psrlw $4, %xmm0
3146 ; SSSE3-NEXT: pand %xmm1, %xmm0
3147 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3148 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3149 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
3150 ; SSSE3-NEXT: psllw $8, %xmm1
3151 ; SSSE3-NEXT: paddb %xmm3, %xmm1
3152 ; SSSE3-NEXT: psrlw $8, %xmm1
3153 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3154 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
3157 ; SSE41-LABEL: ult_7_v8i16:
3159 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3160 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3161 ; SSE41-NEXT: pand %xmm1, %xmm2
3162 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3163 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3164 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3165 ; SSE41-NEXT: psrlw $4, %xmm0
3166 ; SSE41-NEXT: pand %xmm1, %xmm0
3167 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3168 ; SSE41-NEXT: paddb %xmm4, %xmm3
3169 ; SSE41-NEXT: movdqa %xmm3, %xmm1
3170 ; SSE41-NEXT: psllw $8, %xmm1
3171 ; SSE41-NEXT: paddb %xmm3, %xmm1
3172 ; SSE41-NEXT: psrlw $8, %xmm1
3173 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3174 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
3177 ; AVX1-LABEL: ult_7_v8i16:
3179 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3180 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3181 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3182 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3183 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3184 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3185 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3186 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3187 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3188 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3189 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3190 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3191 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3194 ; AVX2-LABEL: ult_7_v8i16:
3196 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3197 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3198 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3199 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3200 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3201 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3202 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3203 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3204 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3205 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3206 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3207 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3208 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3211 ; AVX512VPOPCNTDQ-LABEL: ult_7_v8i16:
3212 ; AVX512VPOPCNTDQ: # %bb.0:
3213 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3214 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3215 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3216 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3217 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3218 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3219 ; AVX512VPOPCNTDQ-NEXT: retq
3221 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v8i16:
3222 ; AVX512VPOPCNTDQVL: # %bb.0:
3223 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3224 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3225 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3226 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3227 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3228 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3229 ; AVX512VPOPCNTDQVL-NEXT: retq
3231 ; BITALG_NOVLX-LABEL: ult_7_v8i16:
3232 ; BITALG_NOVLX: # %bb.0:
3233 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3234 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3235 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3236 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3237 ; BITALG_NOVLX-NEXT: vzeroupper
3238 ; BITALG_NOVLX-NEXT: retq
3240 ; BITALG-LABEL: ult_7_v8i16:
3242 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3243 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
3244 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
3246 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3247 %3 = icmp ult <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
3248 %4 = sext <8 x i1> %3 to <8 x i16>
3252 define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) {
3253 ; SSE2-LABEL: ugt_7_v8i16:
3255 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3256 ; SSE2-NEXT: psrlw $1, %xmm1
3257 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3258 ; SSE2-NEXT: psubb %xmm1, %xmm0
3259 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3260 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3261 ; SSE2-NEXT: pand %xmm1, %xmm2
3262 ; SSE2-NEXT: psrlw $2, %xmm0
3263 ; SSE2-NEXT: pand %xmm1, %xmm0
3264 ; SSE2-NEXT: paddb %xmm2, %xmm0
3265 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3266 ; SSE2-NEXT: psrlw $4, %xmm1
3267 ; SSE2-NEXT: paddb %xmm1, %xmm0
3268 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3269 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3270 ; SSE2-NEXT: psllw $8, %xmm1
3271 ; SSE2-NEXT: paddb %xmm1, %xmm0
3272 ; SSE2-NEXT: psrlw $8, %xmm0
3273 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3276 ; SSE3-LABEL: ugt_7_v8i16:
3278 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3279 ; SSE3-NEXT: psrlw $1, %xmm1
3280 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3281 ; SSE3-NEXT: psubb %xmm1, %xmm0
3282 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3283 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3284 ; SSE3-NEXT: pand %xmm1, %xmm2
3285 ; SSE3-NEXT: psrlw $2, %xmm0
3286 ; SSE3-NEXT: pand %xmm1, %xmm0
3287 ; SSE3-NEXT: paddb %xmm2, %xmm0
3288 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3289 ; SSE3-NEXT: psrlw $4, %xmm1
3290 ; SSE3-NEXT: paddb %xmm1, %xmm0
3291 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3292 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3293 ; SSE3-NEXT: psllw $8, %xmm1
3294 ; SSE3-NEXT: paddb %xmm1, %xmm0
3295 ; SSE3-NEXT: psrlw $8, %xmm0
3296 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3299 ; SSSE3-LABEL: ugt_7_v8i16:
3301 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3302 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3303 ; SSSE3-NEXT: pand %xmm1, %xmm2
3304 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3305 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3306 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3307 ; SSSE3-NEXT: psrlw $4, %xmm0
3308 ; SSSE3-NEXT: pand %xmm1, %xmm0
3309 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3310 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3311 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
3312 ; SSSE3-NEXT: psllw $8, %xmm0
3313 ; SSSE3-NEXT: paddb %xmm3, %xmm0
3314 ; SSSE3-NEXT: psrlw $8, %xmm0
3315 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3318 ; SSE41-LABEL: ugt_7_v8i16:
3320 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3321 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3322 ; SSE41-NEXT: pand %xmm1, %xmm2
3323 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3324 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3325 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3326 ; SSE41-NEXT: psrlw $4, %xmm0
3327 ; SSE41-NEXT: pand %xmm1, %xmm0
3328 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3329 ; SSE41-NEXT: paddb %xmm4, %xmm3
3330 ; SSE41-NEXT: movdqa %xmm3, %xmm0
3331 ; SSE41-NEXT: psllw $8, %xmm0
3332 ; SSE41-NEXT: paddb %xmm3, %xmm0
3333 ; SSE41-NEXT: psrlw $8, %xmm0
3334 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3337 ; AVX1-LABEL: ugt_7_v8i16:
3339 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3340 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3341 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3342 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3343 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3344 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3345 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3346 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3347 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3348 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3349 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3350 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3353 ; AVX2-LABEL: ugt_7_v8i16:
3355 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3356 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3357 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3358 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3359 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3360 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3361 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3362 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3363 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3364 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3365 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3366 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3369 ; AVX512VPOPCNTDQ-LABEL: ugt_7_v8i16:
3370 ; AVX512VPOPCNTDQ: # %bb.0:
3371 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3372 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3373 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3374 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3375 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3376 ; AVX512VPOPCNTDQ-NEXT: retq
3378 ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v8i16:
3379 ; AVX512VPOPCNTDQVL: # %bb.0:
3380 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3381 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3382 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3383 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3384 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3385 ; AVX512VPOPCNTDQVL-NEXT: retq
3387 ; BITALG_NOVLX-LABEL: ugt_7_v8i16:
3388 ; BITALG_NOVLX: # %bb.0:
3389 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3390 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3391 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3392 ; BITALG_NOVLX-NEXT: vzeroupper
3393 ; BITALG_NOVLX-NEXT: retq
3395 ; BITALG-LABEL: ugt_7_v8i16:
3397 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3398 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
3399 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
3401 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3402 %3 = icmp ugt <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
3403 %4 = sext <8 x i1> %3 to <8 x i16>
3407 define <8 x i16> @ult_8_v8i16(<8 x i16> %0) {
3408 ; SSE2-LABEL: ult_8_v8i16:
3410 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3411 ; SSE2-NEXT: psrlw $1, %xmm1
3412 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3413 ; SSE2-NEXT: psubb %xmm1, %xmm0
3414 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3415 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3416 ; SSE2-NEXT: pand %xmm1, %xmm2
3417 ; SSE2-NEXT: psrlw $2, %xmm0
3418 ; SSE2-NEXT: pand %xmm1, %xmm0
3419 ; SSE2-NEXT: paddb %xmm2, %xmm0
3420 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3421 ; SSE2-NEXT: psrlw $4, %xmm1
3422 ; SSE2-NEXT: paddb %xmm0, %xmm1
3423 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3424 ; SSE2-NEXT: movdqa %xmm1, %xmm2
3425 ; SSE2-NEXT: psllw $8, %xmm2
3426 ; SSE2-NEXT: paddb %xmm1, %xmm2
3427 ; SSE2-NEXT: psrlw $8, %xmm2
3428 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3429 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
3432 ; SSE3-LABEL: ult_8_v8i16:
3434 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3435 ; SSE3-NEXT: psrlw $1, %xmm1
3436 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3437 ; SSE3-NEXT: psubb %xmm1, %xmm0
3438 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3439 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3440 ; SSE3-NEXT: pand %xmm1, %xmm2
3441 ; SSE3-NEXT: psrlw $2, %xmm0
3442 ; SSE3-NEXT: pand %xmm1, %xmm0
3443 ; SSE3-NEXT: paddb %xmm2, %xmm0
3444 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3445 ; SSE3-NEXT: psrlw $4, %xmm1
3446 ; SSE3-NEXT: paddb %xmm0, %xmm1
3447 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3448 ; SSE3-NEXT: movdqa %xmm1, %xmm2
3449 ; SSE3-NEXT: psllw $8, %xmm2
3450 ; SSE3-NEXT: paddb %xmm1, %xmm2
3451 ; SSE3-NEXT: psrlw $8, %xmm2
3452 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3453 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
3456 ; SSSE3-LABEL: ult_8_v8i16:
3458 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3459 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3460 ; SSSE3-NEXT: pand %xmm1, %xmm2
3461 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3462 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3463 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3464 ; SSSE3-NEXT: psrlw $4, %xmm0
3465 ; SSSE3-NEXT: pand %xmm1, %xmm0
3466 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3467 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3468 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
3469 ; SSSE3-NEXT: psllw $8, %xmm1
3470 ; SSSE3-NEXT: paddb %xmm3, %xmm1
3471 ; SSSE3-NEXT: psrlw $8, %xmm1
3472 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3473 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
3476 ; SSE41-LABEL: ult_8_v8i16:
3478 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3479 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3480 ; SSE41-NEXT: pand %xmm1, %xmm2
3481 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3482 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3483 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3484 ; SSE41-NEXT: psrlw $4, %xmm0
3485 ; SSE41-NEXT: pand %xmm1, %xmm0
3486 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3487 ; SSE41-NEXT: paddb %xmm4, %xmm3
3488 ; SSE41-NEXT: movdqa %xmm3, %xmm1
3489 ; SSE41-NEXT: psllw $8, %xmm1
3490 ; SSE41-NEXT: paddb %xmm3, %xmm1
3491 ; SSE41-NEXT: psrlw $8, %xmm1
3492 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3493 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
3496 ; AVX1-LABEL: ult_8_v8i16:
3498 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3499 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3500 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3501 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3502 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3503 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3504 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3505 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3506 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3507 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3508 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3509 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3510 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3513 ; AVX2-LABEL: ult_8_v8i16:
3515 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3516 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3517 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3518 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3519 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3520 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3521 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3522 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3523 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3524 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3525 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3526 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3527 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3530 ; AVX512VPOPCNTDQ-LABEL: ult_8_v8i16:
3531 ; AVX512VPOPCNTDQ: # %bb.0:
3532 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3533 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3534 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3535 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3536 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3537 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3538 ; AVX512VPOPCNTDQ-NEXT: retq
3540 ; AVX512VPOPCNTDQVL-LABEL: ult_8_v8i16:
3541 ; AVX512VPOPCNTDQVL: # %bb.0:
3542 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3543 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3544 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3545 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3546 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3547 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3548 ; AVX512VPOPCNTDQVL-NEXT: retq
3550 ; BITALG_NOVLX-LABEL: ult_8_v8i16:
3551 ; BITALG_NOVLX: # %bb.0:
3552 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3553 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3554 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3555 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3556 ; BITALG_NOVLX-NEXT: vzeroupper
3557 ; BITALG_NOVLX-NEXT: retq
3559 ; BITALG-LABEL: ult_8_v8i16:
3561 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3562 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
3563 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
3565 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3566 %3 = icmp ult <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
3567 %4 = sext <8 x i1> %3 to <8 x i16>
3571 define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) {
3572 ; SSE2-LABEL: ugt_8_v8i16:
3574 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3575 ; SSE2-NEXT: psrlw $1, %xmm1
3576 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3577 ; SSE2-NEXT: psubb %xmm1, %xmm0
3578 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3579 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3580 ; SSE2-NEXT: pand %xmm1, %xmm2
3581 ; SSE2-NEXT: psrlw $2, %xmm0
3582 ; SSE2-NEXT: pand %xmm1, %xmm0
3583 ; SSE2-NEXT: paddb %xmm2, %xmm0
3584 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3585 ; SSE2-NEXT: psrlw $4, %xmm1
3586 ; SSE2-NEXT: paddb %xmm1, %xmm0
3587 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3588 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3589 ; SSE2-NEXT: psllw $8, %xmm1
3590 ; SSE2-NEXT: paddb %xmm1, %xmm0
3591 ; SSE2-NEXT: psrlw $8, %xmm0
3592 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3595 ; SSE3-LABEL: ugt_8_v8i16:
3597 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3598 ; SSE3-NEXT: psrlw $1, %xmm1
3599 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3600 ; SSE3-NEXT: psubb %xmm1, %xmm0
3601 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3602 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3603 ; SSE3-NEXT: pand %xmm1, %xmm2
3604 ; SSE3-NEXT: psrlw $2, %xmm0
3605 ; SSE3-NEXT: pand %xmm1, %xmm0
3606 ; SSE3-NEXT: paddb %xmm2, %xmm0
3607 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3608 ; SSE3-NEXT: psrlw $4, %xmm1
3609 ; SSE3-NEXT: paddb %xmm1, %xmm0
3610 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3611 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3612 ; SSE3-NEXT: psllw $8, %xmm1
3613 ; SSE3-NEXT: paddb %xmm1, %xmm0
3614 ; SSE3-NEXT: psrlw $8, %xmm0
3615 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3618 ; SSSE3-LABEL: ugt_8_v8i16:
3620 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3621 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3622 ; SSSE3-NEXT: pand %xmm1, %xmm2
3623 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3624 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3625 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3626 ; SSSE3-NEXT: psrlw $4, %xmm0
3627 ; SSSE3-NEXT: pand %xmm1, %xmm0
3628 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3629 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3630 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
3631 ; SSSE3-NEXT: psllw $8, %xmm0
3632 ; SSSE3-NEXT: paddb %xmm3, %xmm0
3633 ; SSSE3-NEXT: psrlw $8, %xmm0
3634 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3637 ; SSE41-LABEL: ugt_8_v8i16:
3639 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3640 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3641 ; SSE41-NEXT: pand %xmm1, %xmm2
3642 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3643 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3644 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3645 ; SSE41-NEXT: psrlw $4, %xmm0
3646 ; SSE41-NEXT: pand %xmm1, %xmm0
3647 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3648 ; SSE41-NEXT: paddb %xmm4, %xmm3
3649 ; SSE41-NEXT: movdqa %xmm3, %xmm0
3650 ; SSE41-NEXT: psllw $8, %xmm0
3651 ; SSE41-NEXT: paddb %xmm3, %xmm0
3652 ; SSE41-NEXT: psrlw $8, %xmm0
3653 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3656 ; AVX1-LABEL: ugt_8_v8i16:
3658 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3659 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3660 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3661 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3662 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3663 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3664 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3665 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3666 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3667 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3668 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3669 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3672 ; AVX2-LABEL: ugt_8_v8i16:
3674 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3675 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3676 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3677 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3678 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3679 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3680 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3681 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3682 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3683 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3684 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3685 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3688 ; AVX512VPOPCNTDQ-LABEL: ugt_8_v8i16:
3689 ; AVX512VPOPCNTDQ: # %bb.0:
3690 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3691 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3692 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3693 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3694 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3695 ; AVX512VPOPCNTDQ-NEXT: retq
3697 ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v8i16:
3698 ; AVX512VPOPCNTDQVL: # %bb.0:
3699 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3700 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3701 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3702 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3703 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3704 ; AVX512VPOPCNTDQVL-NEXT: retq
3706 ; BITALG_NOVLX-LABEL: ugt_8_v8i16:
3707 ; BITALG_NOVLX: # %bb.0:
3708 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3709 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3710 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3711 ; BITALG_NOVLX-NEXT: vzeroupper
3712 ; BITALG_NOVLX-NEXT: retq
3714 ; BITALG-LABEL: ugt_8_v8i16:
3716 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3717 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
3718 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
3720 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3721 %3 = icmp ugt <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
3722 %4 = sext <8 x i1> %3 to <8 x i16>
3726 define <8 x i16> @ult_9_v8i16(<8 x i16> %0) {
3727 ; SSE2-LABEL: ult_9_v8i16:
3729 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3730 ; SSE2-NEXT: psrlw $1, %xmm1
3731 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3732 ; SSE2-NEXT: psubb %xmm1, %xmm0
3733 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3734 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3735 ; SSE2-NEXT: pand %xmm1, %xmm2
3736 ; SSE2-NEXT: psrlw $2, %xmm0
3737 ; SSE2-NEXT: pand %xmm1, %xmm0
3738 ; SSE2-NEXT: paddb %xmm2, %xmm0
3739 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3740 ; SSE2-NEXT: psrlw $4, %xmm1
3741 ; SSE2-NEXT: paddb %xmm0, %xmm1
3742 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3743 ; SSE2-NEXT: movdqa %xmm1, %xmm2
3744 ; SSE2-NEXT: psllw $8, %xmm2
3745 ; SSE2-NEXT: paddb %xmm1, %xmm2
3746 ; SSE2-NEXT: psrlw $8, %xmm2
3747 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3748 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
3751 ; SSE3-LABEL: ult_9_v8i16:
3753 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3754 ; SSE3-NEXT: psrlw $1, %xmm1
3755 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3756 ; SSE3-NEXT: psubb %xmm1, %xmm0
3757 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3758 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3759 ; SSE3-NEXT: pand %xmm1, %xmm2
3760 ; SSE3-NEXT: psrlw $2, %xmm0
3761 ; SSE3-NEXT: pand %xmm1, %xmm0
3762 ; SSE3-NEXT: paddb %xmm2, %xmm0
3763 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3764 ; SSE3-NEXT: psrlw $4, %xmm1
3765 ; SSE3-NEXT: paddb %xmm0, %xmm1
3766 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3767 ; SSE3-NEXT: movdqa %xmm1, %xmm2
3768 ; SSE3-NEXT: psllw $8, %xmm2
3769 ; SSE3-NEXT: paddb %xmm1, %xmm2
3770 ; SSE3-NEXT: psrlw $8, %xmm2
3771 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3772 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
3775 ; SSSE3-LABEL: ult_9_v8i16:
3777 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3778 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3779 ; SSSE3-NEXT: pand %xmm1, %xmm2
3780 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3781 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3782 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3783 ; SSSE3-NEXT: psrlw $4, %xmm0
3784 ; SSSE3-NEXT: pand %xmm1, %xmm0
3785 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3786 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3787 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
3788 ; SSSE3-NEXT: psllw $8, %xmm1
3789 ; SSSE3-NEXT: paddb %xmm3, %xmm1
3790 ; SSSE3-NEXT: psrlw $8, %xmm1
3791 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3792 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
3795 ; SSE41-LABEL: ult_9_v8i16:
3797 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3798 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3799 ; SSE41-NEXT: pand %xmm1, %xmm2
3800 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3801 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3802 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3803 ; SSE41-NEXT: psrlw $4, %xmm0
3804 ; SSE41-NEXT: pand %xmm1, %xmm0
3805 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3806 ; SSE41-NEXT: paddb %xmm4, %xmm3
3807 ; SSE41-NEXT: movdqa %xmm3, %xmm1
3808 ; SSE41-NEXT: psllw $8, %xmm1
3809 ; SSE41-NEXT: paddb %xmm3, %xmm1
3810 ; SSE41-NEXT: psrlw $8, %xmm1
3811 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3812 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
3815 ; AVX1-LABEL: ult_9_v8i16:
3817 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3818 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3819 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3820 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3821 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3822 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3823 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3824 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3825 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3826 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3827 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3828 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3829 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3832 ; AVX2-LABEL: ult_9_v8i16:
3834 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3835 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3836 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3837 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3838 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3839 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3840 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3841 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3842 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3843 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3844 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3845 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3846 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3849 ; AVX512VPOPCNTDQ-LABEL: ult_9_v8i16:
3850 ; AVX512VPOPCNTDQ: # %bb.0:
3851 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3852 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3853 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3854 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3855 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3856 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3857 ; AVX512VPOPCNTDQ-NEXT: retq
3859 ; AVX512VPOPCNTDQVL-LABEL: ult_9_v8i16:
3860 ; AVX512VPOPCNTDQVL: # %bb.0:
3861 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3862 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3863 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3864 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3865 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3866 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3867 ; AVX512VPOPCNTDQVL-NEXT: retq
3869 ; BITALG_NOVLX-LABEL: ult_9_v8i16:
3870 ; BITALG_NOVLX: # %bb.0:
3871 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3872 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3873 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3874 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3875 ; BITALG_NOVLX-NEXT: vzeroupper
3876 ; BITALG_NOVLX-NEXT: retq
3878 ; BITALG-LABEL: ult_9_v8i16:
3880 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3881 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
3882 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
3884 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3885 %3 = icmp ult <8 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
3886 %4 = sext <8 x i1> %3 to <8 x i16>
3890 define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) {
3891 ; SSE2-LABEL: ugt_9_v8i16:
3893 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3894 ; SSE2-NEXT: psrlw $1, %xmm1
3895 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3896 ; SSE2-NEXT: psubb %xmm1, %xmm0
3897 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3898 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3899 ; SSE2-NEXT: pand %xmm1, %xmm2
3900 ; SSE2-NEXT: psrlw $2, %xmm0
3901 ; SSE2-NEXT: pand %xmm1, %xmm0
3902 ; SSE2-NEXT: paddb %xmm2, %xmm0
3903 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3904 ; SSE2-NEXT: psrlw $4, %xmm1
3905 ; SSE2-NEXT: paddb %xmm1, %xmm0
3906 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3907 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3908 ; SSE2-NEXT: psllw $8, %xmm1
3909 ; SSE2-NEXT: paddb %xmm1, %xmm0
3910 ; SSE2-NEXT: psrlw $8, %xmm0
3911 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3914 ; SSE3-LABEL: ugt_9_v8i16:
3916 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3917 ; SSE3-NEXT: psrlw $1, %xmm1
3918 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3919 ; SSE3-NEXT: psubb %xmm1, %xmm0
3920 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3921 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3922 ; SSE3-NEXT: pand %xmm1, %xmm2
3923 ; SSE3-NEXT: psrlw $2, %xmm0
3924 ; SSE3-NEXT: pand %xmm1, %xmm0
3925 ; SSE3-NEXT: paddb %xmm2, %xmm0
3926 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3927 ; SSE3-NEXT: psrlw $4, %xmm1
3928 ; SSE3-NEXT: paddb %xmm1, %xmm0
3929 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3930 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3931 ; SSE3-NEXT: psllw $8, %xmm1
3932 ; SSE3-NEXT: paddb %xmm1, %xmm0
3933 ; SSE3-NEXT: psrlw $8, %xmm0
3934 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3937 ; SSSE3-LABEL: ugt_9_v8i16:
3939 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3940 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3941 ; SSSE3-NEXT: pand %xmm1, %xmm2
3942 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3943 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3944 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3945 ; SSSE3-NEXT: psrlw $4, %xmm0
3946 ; SSSE3-NEXT: pand %xmm1, %xmm0
3947 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3948 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3949 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
3950 ; SSSE3-NEXT: psllw $8, %xmm0
3951 ; SSSE3-NEXT: paddb %xmm3, %xmm0
3952 ; SSSE3-NEXT: psrlw $8, %xmm0
3953 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3956 ; SSE41-LABEL: ugt_9_v8i16:
3958 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3959 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3960 ; SSE41-NEXT: pand %xmm1, %xmm2
3961 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3962 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3963 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3964 ; SSE41-NEXT: psrlw $4, %xmm0
3965 ; SSE41-NEXT: pand %xmm1, %xmm0
3966 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3967 ; SSE41-NEXT: paddb %xmm4, %xmm3
3968 ; SSE41-NEXT: movdqa %xmm3, %xmm0
3969 ; SSE41-NEXT: psllw $8, %xmm0
3970 ; SSE41-NEXT: paddb %xmm3, %xmm0
3971 ; SSE41-NEXT: psrlw $8, %xmm0
3972 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3975 ; AVX1-LABEL: ugt_9_v8i16:
3977 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3978 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3979 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3980 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3981 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3982 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3983 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3984 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3985 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3986 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3987 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3988 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3991 ; AVX2-LABEL: ugt_9_v8i16:
3993 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3994 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3995 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3996 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3997 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3998 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3999 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4000 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4001 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4002 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4003 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4004 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4007 ; AVX512VPOPCNTDQ-LABEL: ugt_9_v8i16:
4008 ; AVX512VPOPCNTDQ: # %bb.0:
4009 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4010 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4011 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4012 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4013 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4014 ; AVX512VPOPCNTDQ-NEXT: retq
4016 ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v8i16:
4017 ; AVX512VPOPCNTDQVL: # %bb.0:
4018 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4019 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4020 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4021 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4022 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4023 ; AVX512VPOPCNTDQVL-NEXT: retq
4025 ; BITALG_NOVLX-LABEL: ugt_9_v8i16:
4026 ; BITALG_NOVLX: # %bb.0:
4027 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4028 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4029 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4030 ; BITALG_NOVLX-NEXT: vzeroupper
4031 ; BITALG_NOVLX-NEXT: retq
4033 ; BITALG-LABEL: ugt_9_v8i16:
4035 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4036 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
4037 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
4039 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4040 %3 = icmp ugt <8 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
4041 %4 = sext <8 x i1> %3 to <8 x i16>
4045 define <8 x i16> @ult_10_v8i16(<8 x i16> %0) {
4046 ; SSE2-LABEL: ult_10_v8i16:
4048 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4049 ; SSE2-NEXT: psrlw $1, %xmm1
4050 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4051 ; SSE2-NEXT: psubb %xmm1, %xmm0
4052 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4053 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4054 ; SSE2-NEXT: pand %xmm1, %xmm2
4055 ; SSE2-NEXT: psrlw $2, %xmm0
4056 ; SSE2-NEXT: pand %xmm1, %xmm0
4057 ; SSE2-NEXT: paddb %xmm2, %xmm0
4058 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4059 ; SSE2-NEXT: psrlw $4, %xmm1
4060 ; SSE2-NEXT: paddb %xmm0, %xmm1
4061 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4062 ; SSE2-NEXT: movdqa %xmm1, %xmm2
4063 ; SSE2-NEXT: psllw $8, %xmm2
4064 ; SSE2-NEXT: paddb %xmm1, %xmm2
4065 ; SSE2-NEXT: psrlw $8, %xmm2
4066 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4067 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
4070 ; SSE3-LABEL: ult_10_v8i16:
4072 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4073 ; SSE3-NEXT: psrlw $1, %xmm1
4074 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4075 ; SSE3-NEXT: psubb %xmm1, %xmm0
4076 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4077 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4078 ; SSE3-NEXT: pand %xmm1, %xmm2
4079 ; SSE3-NEXT: psrlw $2, %xmm0
4080 ; SSE3-NEXT: pand %xmm1, %xmm0
4081 ; SSE3-NEXT: paddb %xmm2, %xmm0
4082 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4083 ; SSE3-NEXT: psrlw $4, %xmm1
4084 ; SSE3-NEXT: paddb %xmm0, %xmm1
4085 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4086 ; SSE3-NEXT: movdqa %xmm1, %xmm2
4087 ; SSE3-NEXT: psllw $8, %xmm2
4088 ; SSE3-NEXT: paddb %xmm1, %xmm2
4089 ; SSE3-NEXT: psrlw $8, %xmm2
4090 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4091 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
4094 ; SSSE3-LABEL: ult_10_v8i16:
4096 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4097 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4098 ; SSSE3-NEXT: pand %xmm1, %xmm2
4099 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4100 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4101 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4102 ; SSSE3-NEXT: psrlw $4, %xmm0
4103 ; SSSE3-NEXT: pand %xmm1, %xmm0
4104 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4105 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4106 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
4107 ; SSSE3-NEXT: psllw $8, %xmm1
4108 ; SSSE3-NEXT: paddb %xmm3, %xmm1
4109 ; SSSE3-NEXT: psrlw $8, %xmm1
4110 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4111 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
4114 ; SSE41-LABEL: ult_10_v8i16:
4116 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4117 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4118 ; SSE41-NEXT: pand %xmm1, %xmm2
4119 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4120 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4121 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4122 ; SSE41-NEXT: psrlw $4, %xmm0
4123 ; SSE41-NEXT: pand %xmm1, %xmm0
4124 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4125 ; SSE41-NEXT: paddb %xmm4, %xmm3
4126 ; SSE41-NEXT: movdqa %xmm3, %xmm1
4127 ; SSE41-NEXT: psllw $8, %xmm1
4128 ; SSE41-NEXT: paddb %xmm3, %xmm1
4129 ; SSE41-NEXT: psrlw $8, %xmm1
4130 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4131 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
4134 ; AVX1-LABEL: ult_10_v8i16:
4136 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4137 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4138 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4139 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4140 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4141 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4142 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4143 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4144 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4145 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4146 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4147 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4148 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4151 ; AVX2-LABEL: ult_10_v8i16:
4153 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4154 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4155 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4156 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4157 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4158 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4159 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4160 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4161 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4162 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4163 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4164 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4165 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4168 ; AVX512VPOPCNTDQ-LABEL: ult_10_v8i16:
4169 ; AVX512VPOPCNTDQ: # %bb.0:
4170 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4171 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4172 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4173 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4174 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4175 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4176 ; AVX512VPOPCNTDQ-NEXT: retq
4178 ; AVX512VPOPCNTDQVL-LABEL: ult_10_v8i16:
4179 ; AVX512VPOPCNTDQVL: # %bb.0:
4180 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4181 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4182 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4183 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4184 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4185 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4186 ; AVX512VPOPCNTDQVL-NEXT: retq
4188 ; BITALG_NOVLX-LABEL: ult_10_v8i16:
4189 ; BITALG_NOVLX: # %bb.0:
4190 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4191 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4192 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4193 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4194 ; BITALG_NOVLX-NEXT: vzeroupper
4195 ; BITALG_NOVLX-NEXT: retq
4197 ; BITALG-LABEL: ult_10_v8i16:
4199 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4200 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
4201 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
4203 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4204 %3 = icmp ult <8 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
4205 %4 = sext <8 x i1> %3 to <8 x i16>
4209 define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) {
4210 ; SSE2-LABEL: ugt_10_v8i16:
4212 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4213 ; SSE2-NEXT: psrlw $1, %xmm1
4214 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4215 ; SSE2-NEXT: psubb %xmm1, %xmm0
4216 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4217 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4218 ; SSE2-NEXT: pand %xmm1, %xmm2
4219 ; SSE2-NEXT: psrlw $2, %xmm0
4220 ; SSE2-NEXT: pand %xmm1, %xmm0
4221 ; SSE2-NEXT: paddb %xmm2, %xmm0
4222 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4223 ; SSE2-NEXT: psrlw $4, %xmm1
4224 ; SSE2-NEXT: paddb %xmm1, %xmm0
4225 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4226 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4227 ; SSE2-NEXT: psllw $8, %xmm1
4228 ; SSE2-NEXT: paddb %xmm1, %xmm0
4229 ; SSE2-NEXT: psrlw $8, %xmm0
4230 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4233 ; SSE3-LABEL: ugt_10_v8i16:
4235 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4236 ; SSE3-NEXT: psrlw $1, %xmm1
4237 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4238 ; SSE3-NEXT: psubb %xmm1, %xmm0
4239 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4240 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4241 ; SSE3-NEXT: pand %xmm1, %xmm2
4242 ; SSE3-NEXT: psrlw $2, %xmm0
4243 ; SSE3-NEXT: pand %xmm1, %xmm0
4244 ; SSE3-NEXT: paddb %xmm2, %xmm0
4245 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4246 ; SSE3-NEXT: psrlw $4, %xmm1
4247 ; SSE3-NEXT: paddb %xmm1, %xmm0
4248 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4249 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4250 ; SSE3-NEXT: psllw $8, %xmm1
4251 ; SSE3-NEXT: paddb %xmm1, %xmm0
4252 ; SSE3-NEXT: psrlw $8, %xmm0
4253 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4256 ; SSSE3-LABEL: ugt_10_v8i16:
4258 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4259 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4260 ; SSSE3-NEXT: pand %xmm1, %xmm2
4261 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4262 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4263 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4264 ; SSSE3-NEXT: psrlw $4, %xmm0
4265 ; SSSE3-NEXT: pand %xmm1, %xmm0
4266 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4267 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4268 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
4269 ; SSSE3-NEXT: psllw $8, %xmm0
4270 ; SSSE3-NEXT: paddb %xmm3, %xmm0
4271 ; SSSE3-NEXT: psrlw $8, %xmm0
4272 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4275 ; SSE41-LABEL: ugt_10_v8i16:
4277 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4278 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4279 ; SSE41-NEXT: pand %xmm1, %xmm2
4280 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4281 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4282 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4283 ; SSE41-NEXT: psrlw $4, %xmm0
4284 ; SSE41-NEXT: pand %xmm1, %xmm0
4285 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4286 ; SSE41-NEXT: paddb %xmm4, %xmm3
4287 ; SSE41-NEXT: movdqa %xmm3, %xmm0
4288 ; SSE41-NEXT: psllw $8, %xmm0
4289 ; SSE41-NEXT: paddb %xmm3, %xmm0
4290 ; SSE41-NEXT: psrlw $8, %xmm0
4291 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4294 ; AVX1-LABEL: ugt_10_v8i16:
4296 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4297 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4298 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4299 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4300 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4301 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4302 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4303 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4304 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4305 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4306 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4307 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4310 ; AVX2-LABEL: ugt_10_v8i16:
4312 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4313 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4314 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4315 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4316 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4317 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4318 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4319 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4320 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4321 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4322 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4323 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4326 ; AVX512VPOPCNTDQ-LABEL: ugt_10_v8i16:
4327 ; AVX512VPOPCNTDQ: # %bb.0:
4328 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4329 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4330 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4331 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4332 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4333 ; AVX512VPOPCNTDQ-NEXT: retq
4335 ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v8i16:
4336 ; AVX512VPOPCNTDQVL: # %bb.0:
4337 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4338 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4339 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4340 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4341 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4342 ; AVX512VPOPCNTDQVL-NEXT: retq
4344 ; BITALG_NOVLX-LABEL: ugt_10_v8i16:
4345 ; BITALG_NOVLX: # %bb.0:
4346 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4347 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4348 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4349 ; BITALG_NOVLX-NEXT: vzeroupper
4350 ; BITALG_NOVLX-NEXT: retq
4352 ; BITALG-LABEL: ugt_10_v8i16:
4354 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4355 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
4356 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
4358 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4359 %3 = icmp ugt <8 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
4360 %4 = sext <8 x i1> %3 to <8 x i16>
4364 define <8 x i16> @ult_11_v8i16(<8 x i16> %0) {
4365 ; SSE2-LABEL: ult_11_v8i16:
4367 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4368 ; SSE2-NEXT: psrlw $1, %xmm1
4369 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4370 ; SSE2-NEXT: psubb %xmm1, %xmm0
4371 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4372 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4373 ; SSE2-NEXT: pand %xmm1, %xmm2
4374 ; SSE2-NEXT: psrlw $2, %xmm0
4375 ; SSE2-NEXT: pand %xmm1, %xmm0
4376 ; SSE2-NEXT: paddb %xmm2, %xmm0
4377 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4378 ; SSE2-NEXT: psrlw $4, %xmm1
4379 ; SSE2-NEXT: paddb %xmm0, %xmm1
4380 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4381 ; SSE2-NEXT: movdqa %xmm1, %xmm2
4382 ; SSE2-NEXT: psllw $8, %xmm2
4383 ; SSE2-NEXT: paddb %xmm1, %xmm2
4384 ; SSE2-NEXT: psrlw $8, %xmm2
4385 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4386 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
4389 ; SSE3-LABEL: ult_11_v8i16:
4391 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4392 ; SSE3-NEXT: psrlw $1, %xmm1
4393 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4394 ; SSE3-NEXT: psubb %xmm1, %xmm0
4395 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4396 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4397 ; SSE3-NEXT: pand %xmm1, %xmm2
4398 ; SSE3-NEXT: psrlw $2, %xmm0
4399 ; SSE3-NEXT: pand %xmm1, %xmm0
4400 ; SSE3-NEXT: paddb %xmm2, %xmm0
4401 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4402 ; SSE3-NEXT: psrlw $4, %xmm1
4403 ; SSE3-NEXT: paddb %xmm0, %xmm1
4404 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4405 ; SSE3-NEXT: movdqa %xmm1, %xmm2
4406 ; SSE3-NEXT: psllw $8, %xmm2
4407 ; SSE3-NEXT: paddb %xmm1, %xmm2
4408 ; SSE3-NEXT: psrlw $8, %xmm2
4409 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4410 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
4413 ; SSSE3-LABEL: ult_11_v8i16:
4415 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4416 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4417 ; SSSE3-NEXT: pand %xmm1, %xmm2
4418 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4419 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4420 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4421 ; SSSE3-NEXT: psrlw $4, %xmm0
4422 ; SSSE3-NEXT: pand %xmm1, %xmm0
4423 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4424 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4425 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
4426 ; SSSE3-NEXT: psllw $8, %xmm1
4427 ; SSSE3-NEXT: paddb %xmm3, %xmm1
4428 ; SSSE3-NEXT: psrlw $8, %xmm1
4429 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4430 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
4433 ; SSE41-LABEL: ult_11_v8i16:
4435 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4436 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4437 ; SSE41-NEXT: pand %xmm1, %xmm2
4438 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4439 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4440 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4441 ; SSE41-NEXT: psrlw $4, %xmm0
4442 ; SSE41-NEXT: pand %xmm1, %xmm0
4443 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4444 ; SSE41-NEXT: paddb %xmm4, %xmm3
4445 ; SSE41-NEXT: movdqa %xmm3, %xmm1
4446 ; SSE41-NEXT: psllw $8, %xmm1
4447 ; SSE41-NEXT: paddb %xmm3, %xmm1
4448 ; SSE41-NEXT: psrlw $8, %xmm1
4449 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4450 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
4453 ; AVX1-LABEL: ult_11_v8i16:
4455 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4456 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4457 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4458 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4459 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4460 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4461 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4462 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4463 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4464 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4465 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4466 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4467 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4470 ; AVX2-LABEL: ult_11_v8i16:
4472 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4473 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4474 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4475 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4476 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4477 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4478 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4479 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4480 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4481 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4482 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4483 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4484 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4487 ; AVX512VPOPCNTDQ-LABEL: ult_11_v8i16:
4488 ; AVX512VPOPCNTDQ: # %bb.0:
4489 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4490 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4491 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4492 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4493 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4494 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4495 ; AVX512VPOPCNTDQ-NEXT: retq
4497 ; AVX512VPOPCNTDQVL-LABEL: ult_11_v8i16:
4498 ; AVX512VPOPCNTDQVL: # %bb.0:
4499 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4500 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4501 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4502 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4503 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4504 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4505 ; AVX512VPOPCNTDQVL-NEXT: retq
4507 ; BITALG_NOVLX-LABEL: ult_11_v8i16:
4508 ; BITALG_NOVLX: # %bb.0:
4509 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4510 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4511 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4512 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4513 ; BITALG_NOVLX-NEXT: vzeroupper
4514 ; BITALG_NOVLX-NEXT: retq
4516 ; BITALG-LABEL: ult_11_v8i16:
4518 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4519 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
4520 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
4522 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4523 %3 = icmp ult <8 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
4524 %4 = sext <8 x i1> %3 to <8 x i16>
4528 define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) {
4529 ; SSE2-LABEL: ugt_11_v8i16:
4531 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4532 ; SSE2-NEXT: psrlw $1, %xmm1
4533 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4534 ; SSE2-NEXT: psubb %xmm1, %xmm0
4535 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4536 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4537 ; SSE2-NEXT: pand %xmm1, %xmm2
4538 ; SSE2-NEXT: psrlw $2, %xmm0
4539 ; SSE2-NEXT: pand %xmm1, %xmm0
4540 ; SSE2-NEXT: paddb %xmm2, %xmm0
4541 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4542 ; SSE2-NEXT: psrlw $4, %xmm1
4543 ; SSE2-NEXT: paddb %xmm1, %xmm0
4544 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4545 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4546 ; SSE2-NEXT: psllw $8, %xmm1
4547 ; SSE2-NEXT: paddb %xmm1, %xmm0
4548 ; SSE2-NEXT: psrlw $8, %xmm0
4549 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4552 ; SSE3-LABEL: ugt_11_v8i16:
4554 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4555 ; SSE3-NEXT: psrlw $1, %xmm1
4556 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4557 ; SSE3-NEXT: psubb %xmm1, %xmm0
4558 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4559 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4560 ; SSE3-NEXT: pand %xmm1, %xmm2
4561 ; SSE3-NEXT: psrlw $2, %xmm0
4562 ; SSE3-NEXT: pand %xmm1, %xmm0
4563 ; SSE3-NEXT: paddb %xmm2, %xmm0
4564 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4565 ; SSE3-NEXT: psrlw $4, %xmm1
4566 ; SSE3-NEXT: paddb %xmm1, %xmm0
4567 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4568 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4569 ; SSE3-NEXT: psllw $8, %xmm1
4570 ; SSE3-NEXT: paddb %xmm1, %xmm0
4571 ; SSE3-NEXT: psrlw $8, %xmm0
4572 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4575 ; SSSE3-LABEL: ugt_11_v8i16:
4577 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4578 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4579 ; SSSE3-NEXT: pand %xmm1, %xmm2
4580 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4581 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4582 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4583 ; SSSE3-NEXT: psrlw $4, %xmm0
4584 ; SSSE3-NEXT: pand %xmm1, %xmm0
4585 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4586 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4587 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
4588 ; SSSE3-NEXT: psllw $8, %xmm0
4589 ; SSSE3-NEXT: paddb %xmm3, %xmm0
4590 ; SSSE3-NEXT: psrlw $8, %xmm0
4591 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4594 ; SSE41-LABEL: ugt_11_v8i16:
4596 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4597 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4598 ; SSE41-NEXT: pand %xmm1, %xmm2
4599 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4600 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4601 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4602 ; SSE41-NEXT: psrlw $4, %xmm0
4603 ; SSE41-NEXT: pand %xmm1, %xmm0
4604 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4605 ; SSE41-NEXT: paddb %xmm4, %xmm3
4606 ; SSE41-NEXT: movdqa %xmm3, %xmm0
4607 ; SSE41-NEXT: psllw $8, %xmm0
4608 ; SSE41-NEXT: paddb %xmm3, %xmm0
4609 ; SSE41-NEXT: psrlw $8, %xmm0
4610 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4613 ; AVX1-LABEL: ugt_11_v8i16:
4615 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4616 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4617 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4618 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4619 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4620 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4621 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4622 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4623 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4624 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4625 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4626 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4629 ; AVX2-LABEL: ugt_11_v8i16:
4631 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4632 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4633 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4634 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4635 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4636 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4637 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4638 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4639 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4640 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4641 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4642 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4645 ; AVX512VPOPCNTDQ-LABEL: ugt_11_v8i16:
4646 ; AVX512VPOPCNTDQ: # %bb.0:
4647 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4648 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4649 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4650 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4651 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4652 ; AVX512VPOPCNTDQ-NEXT: retq
4654 ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v8i16:
4655 ; AVX512VPOPCNTDQVL: # %bb.0:
4656 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4657 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4658 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4659 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4660 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4661 ; AVX512VPOPCNTDQVL-NEXT: retq
4663 ; BITALG_NOVLX-LABEL: ugt_11_v8i16:
4664 ; BITALG_NOVLX: # %bb.0:
4665 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4666 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4667 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4668 ; BITALG_NOVLX-NEXT: vzeroupper
4669 ; BITALG_NOVLX-NEXT: retq
4671 ; BITALG-LABEL: ugt_11_v8i16:
4673 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4674 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
4675 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
4677 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4678 %3 = icmp ugt <8 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
4679 %4 = sext <8 x i1> %3 to <8 x i16>
4683 define <8 x i16> @ult_12_v8i16(<8 x i16> %0) {
4684 ; SSE2-LABEL: ult_12_v8i16:
4686 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4687 ; SSE2-NEXT: psrlw $1, %xmm1
4688 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4689 ; SSE2-NEXT: psubb %xmm1, %xmm0
4690 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4691 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4692 ; SSE2-NEXT: pand %xmm1, %xmm2
4693 ; SSE2-NEXT: psrlw $2, %xmm0
4694 ; SSE2-NEXT: pand %xmm1, %xmm0
4695 ; SSE2-NEXT: paddb %xmm2, %xmm0
4696 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4697 ; SSE2-NEXT: psrlw $4, %xmm1
4698 ; SSE2-NEXT: paddb %xmm0, %xmm1
4699 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4700 ; SSE2-NEXT: movdqa %xmm1, %xmm2
4701 ; SSE2-NEXT: psllw $8, %xmm2
4702 ; SSE2-NEXT: paddb %xmm1, %xmm2
4703 ; SSE2-NEXT: psrlw $8, %xmm2
4704 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4705 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
4708 ; SSE3-LABEL: ult_12_v8i16:
4710 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4711 ; SSE3-NEXT: psrlw $1, %xmm1
4712 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4713 ; SSE3-NEXT: psubb %xmm1, %xmm0
4714 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4715 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4716 ; SSE3-NEXT: pand %xmm1, %xmm2
4717 ; SSE3-NEXT: psrlw $2, %xmm0
4718 ; SSE3-NEXT: pand %xmm1, %xmm0
4719 ; SSE3-NEXT: paddb %xmm2, %xmm0
4720 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4721 ; SSE3-NEXT: psrlw $4, %xmm1
4722 ; SSE3-NEXT: paddb %xmm0, %xmm1
4723 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4724 ; SSE3-NEXT: movdqa %xmm1, %xmm2
4725 ; SSE3-NEXT: psllw $8, %xmm2
4726 ; SSE3-NEXT: paddb %xmm1, %xmm2
4727 ; SSE3-NEXT: psrlw $8, %xmm2
4728 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4729 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
4732 ; SSSE3-LABEL: ult_12_v8i16:
4734 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4735 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4736 ; SSSE3-NEXT: pand %xmm1, %xmm2
4737 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4738 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4739 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4740 ; SSSE3-NEXT: psrlw $4, %xmm0
4741 ; SSSE3-NEXT: pand %xmm1, %xmm0
4742 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4743 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4744 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
4745 ; SSSE3-NEXT: psllw $8, %xmm1
4746 ; SSSE3-NEXT: paddb %xmm3, %xmm1
4747 ; SSSE3-NEXT: psrlw $8, %xmm1
4748 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4749 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
4752 ; SSE41-LABEL: ult_12_v8i16:
4754 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4755 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4756 ; SSE41-NEXT: pand %xmm1, %xmm2
4757 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4758 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4759 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4760 ; SSE41-NEXT: psrlw $4, %xmm0
4761 ; SSE41-NEXT: pand %xmm1, %xmm0
4762 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4763 ; SSE41-NEXT: paddb %xmm4, %xmm3
4764 ; SSE41-NEXT: movdqa %xmm3, %xmm1
4765 ; SSE41-NEXT: psllw $8, %xmm1
4766 ; SSE41-NEXT: paddb %xmm3, %xmm1
4767 ; SSE41-NEXT: psrlw $8, %xmm1
4768 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4769 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
4772 ; AVX1-LABEL: ult_12_v8i16:
4774 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4775 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4776 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4777 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4778 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4779 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4780 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4781 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4782 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4783 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4784 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4785 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4786 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4789 ; AVX2-LABEL: ult_12_v8i16:
4791 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4792 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4793 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4794 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4795 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4796 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4797 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4798 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4799 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4800 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4801 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4802 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4803 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4806 ; AVX512VPOPCNTDQ-LABEL: ult_12_v8i16:
4807 ; AVX512VPOPCNTDQ: # %bb.0:
4808 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4809 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4810 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4811 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4812 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4813 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4814 ; AVX512VPOPCNTDQ-NEXT: retq
4816 ; AVX512VPOPCNTDQVL-LABEL: ult_12_v8i16:
4817 ; AVX512VPOPCNTDQVL: # %bb.0:
4818 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4819 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4820 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4821 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4822 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4823 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4824 ; AVX512VPOPCNTDQVL-NEXT: retq
4826 ; BITALG_NOVLX-LABEL: ult_12_v8i16:
4827 ; BITALG_NOVLX: # %bb.0:
4828 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4829 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4830 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4831 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4832 ; BITALG_NOVLX-NEXT: vzeroupper
4833 ; BITALG_NOVLX-NEXT: retq
4835 ; BITALG-LABEL: ult_12_v8i16:
4837 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4838 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
4839 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
4841 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4842 %3 = icmp ult <8 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
4843 %4 = sext <8 x i1> %3 to <8 x i16>
4847 define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) {
4848 ; SSE2-LABEL: ugt_12_v8i16:
4850 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4851 ; SSE2-NEXT: psrlw $1, %xmm1
4852 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4853 ; SSE2-NEXT: psubb %xmm1, %xmm0
4854 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4855 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4856 ; SSE2-NEXT: pand %xmm1, %xmm2
4857 ; SSE2-NEXT: psrlw $2, %xmm0
4858 ; SSE2-NEXT: pand %xmm1, %xmm0
4859 ; SSE2-NEXT: paddb %xmm2, %xmm0
4860 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4861 ; SSE2-NEXT: psrlw $4, %xmm1
4862 ; SSE2-NEXT: paddb %xmm1, %xmm0
4863 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4864 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4865 ; SSE2-NEXT: psllw $8, %xmm1
4866 ; SSE2-NEXT: paddb %xmm1, %xmm0
4867 ; SSE2-NEXT: psrlw $8, %xmm0
4868 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4871 ; SSE3-LABEL: ugt_12_v8i16:
4873 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4874 ; SSE3-NEXT: psrlw $1, %xmm1
4875 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4876 ; SSE3-NEXT: psubb %xmm1, %xmm0
4877 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4878 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4879 ; SSE3-NEXT: pand %xmm1, %xmm2
4880 ; SSE3-NEXT: psrlw $2, %xmm0
4881 ; SSE3-NEXT: pand %xmm1, %xmm0
4882 ; SSE3-NEXT: paddb %xmm2, %xmm0
4883 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4884 ; SSE3-NEXT: psrlw $4, %xmm1
4885 ; SSE3-NEXT: paddb %xmm1, %xmm0
4886 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4887 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4888 ; SSE3-NEXT: psllw $8, %xmm1
4889 ; SSE3-NEXT: paddb %xmm1, %xmm0
4890 ; SSE3-NEXT: psrlw $8, %xmm0
4891 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4894 ; SSSE3-LABEL: ugt_12_v8i16:
4896 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4897 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4898 ; SSSE3-NEXT: pand %xmm1, %xmm2
4899 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4900 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4901 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4902 ; SSSE3-NEXT: psrlw $4, %xmm0
4903 ; SSSE3-NEXT: pand %xmm1, %xmm0
4904 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4905 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4906 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
4907 ; SSSE3-NEXT: psllw $8, %xmm0
4908 ; SSSE3-NEXT: paddb %xmm3, %xmm0
4909 ; SSSE3-NEXT: psrlw $8, %xmm0
4910 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4913 ; SSE41-LABEL: ugt_12_v8i16:
4915 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4916 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4917 ; SSE41-NEXT: pand %xmm1, %xmm2
4918 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4919 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4920 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4921 ; SSE41-NEXT: psrlw $4, %xmm0
4922 ; SSE41-NEXT: pand %xmm1, %xmm0
4923 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4924 ; SSE41-NEXT: paddb %xmm4, %xmm3
4925 ; SSE41-NEXT: movdqa %xmm3, %xmm0
4926 ; SSE41-NEXT: psllw $8, %xmm0
4927 ; SSE41-NEXT: paddb %xmm3, %xmm0
4928 ; SSE41-NEXT: psrlw $8, %xmm0
4929 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4932 ; AVX1-LABEL: ugt_12_v8i16:
4934 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4935 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4936 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4937 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4938 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4939 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4940 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4941 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4942 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4943 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4944 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4945 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4948 ; AVX2-LABEL: ugt_12_v8i16:
4950 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4951 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4952 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4953 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4954 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4955 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4956 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4957 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4958 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4959 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4960 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4961 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4964 ; AVX512VPOPCNTDQ-LABEL: ugt_12_v8i16:
4965 ; AVX512VPOPCNTDQ: # %bb.0:
4966 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4967 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4968 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4969 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4970 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4971 ; AVX512VPOPCNTDQ-NEXT: retq
4973 ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v8i16:
4974 ; AVX512VPOPCNTDQVL: # %bb.0:
4975 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4976 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4977 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4978 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4979 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4980 ; AVX512VPOPCNTDQVL-NEXT: retq
4982 ; BITALG_NOVLX-LABEL: ugt_12_v8i16:
4983 ; BITALG_NOVLX: # %bb.0:
4984 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4985 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4986 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4987 ; BITALG_NOVLX-NEXT: vzeroupper
4988 ; BITALG_NOVLX-NEXT: retq
4990 ; BITALG-LABEL: ugt_12_v8i16:
4992 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4993 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
4994 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
4996 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4997 %3 = icmp ugt <8 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
4998 %4 = sext <8 x i1> %3 to <8 x i16>
5002 define <8 x i16> @ult_13_v8i16(<8 x i16> %0) {
5003 ; SSE2-LABEL: ult_13_v8i16:
5005 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5006 ; SSE2-NEXT: psrlw $1, %xmm1
5007 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5008 ; SSE2-NEXT: psubb %xmm1, %xmm0
5009 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5010 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5011 ; SSE2-NEXT: pand %xmm1, %xmm2
5012 ; SSE2-NEXT: psrlw $2, %xmm0
5013 ; SSE2-NEXT: pand %xmm1, %xmm0
5014 ; SSE2-NEXT: paddb %xmm2, %xmm0
5015 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5016 ; SSE2-NEXT: psrlw $4, %xmm1
5017 ; SSE2-NEXT: paddb %xmm0, %xmm1
5018 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5019 ; SSE2-NEXT: movdqa %xmm1, %xmm2
5020 ; SSE2-NEXT: psllw $8, %xmm2
5021 ; SSE2-NEXT: paddb %xmm1, %xmm2
5022 ; SSE2-NEXT: psrlw $8, %xmm2
5023 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5024 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
5027 ; SSE3-LABEL: ult_13_v8i16:
5029 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5030 ; SSE3-NEXT: psrlw $1, %xmm1
5031 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5032 ; SSE3-NEXT: psubb %xmm1, %xmm0
5033 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5034 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5035 ; SSE3-NEXT: pand %xmm1, %xmm2
5036 ; SSE3-NEXT: psrlw $2, %xmm0
5037 ; SSE3-NEXT: pand %xmm1, %xmm0
5038 ; SSE3-NEXT: paddb %xmm2, %xmm0
5039 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5040 ; SSE3-NEXT: psrlw $4, %xmm1
5041 ; SSE3-NEXT: paddb %xmm0, %xmm1
5042 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5043 ; SSE3-NEXT: movdqa %xmm1, %xmm2
5044 ; SSE3-NEXT: psllw $8, %xmm2
5045 ; SSE3-NEXT: paddb %xmm1, %xmm2
5046 ; SSE3-NEXT: psrlw $8, %xmm2
5047 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5048 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
5051 ; SSSE3-LABEL: ult_13_v8i16:
5053 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5054 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
5055 ; SSSE3-NEXT: pand %xmm1, %xmm2
5056 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5057 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
5058 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
5059 ; SSSE3-NEXT: psrlw $4, %xmm0
5060 ; SSSE3-NEXT: pand %xmm1, %xmm0
5061 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
5062 ; SSSE3-NEXT: paddb %xmm4, %xmm3
5063 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
5064 ; SSSE3-NEXT: psllw $8, %xmm1
5065 ; SSSE3-NEXT: paddb %xmm3, %xmm1
5066 ; SSSE3-NEXT: psrlw $8, %xmm1
5067 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5068 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
5071 ; SSE41-LABEL: ult_13_v8i16:
5073 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5074 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5075 ; SSE41-NEXT: pand %xmm1, %xmm2
5076 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5077 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5078 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5079 ; SSE41-NEXT: psrlw $4, %xmm0
5080 ; SSE41-NEXT: pand %xmm1, %xmm0
5081 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5082 ; SSE41-NEXT: paddb %xmm4, %xmm3
5083 ; SSE41-NEXT: movdqa %xmm3, %xmm1
5084 ; SSE41-NEXT: psllw $8, %xmm1
5085 ; SSE41-NEXT: paddb %xmm3, %xmm1
5086 ; SSE41-NEXT: psrlw $8, %xmm1
5087 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5088 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
5091 ; AVX1-LABEL: ult_13_v8i16:
5093 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5094 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5095 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5096 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5097 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
5098 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5099 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5100 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5101 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
5102 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5103 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
5104 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5105 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5108 ; AVX2-LABEL: ult_13_v8i16:
5110 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5111 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
5112 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5113 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5114 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
5115 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5116 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5117 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5118 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
5119 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5120 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
5121 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5122 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5125 ; AVX512VPOPCNTDQ-LABEL: ult_13_v8i16:
5126 ; AVX512VPOPCNTDQ: # %bb.0:
5127 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5128 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5129 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
5130 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5131 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5132 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5133 ; AVX512VPOPCNTDQ-NEXT: retq
5135 ; AVX512VPOPCNTDQVL-LABEL: ult_13_v8i16:
5136 ; AVX512VPOPCNTDQVL: # %bb.0:
5137 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5138 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
5139 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
5140 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5141 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5142 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
5143 ; AVX512VPOPCNTDQVL-NEXT: retq
5145 ; BITALG_NOVLX-LABEL: ult_13_v8i16:
5146 ; BITALG_NOVLX: # %bb.0:
5147 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5148 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
5149 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5150 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5151 ; BITALG_NOVLX-NEXT: vzeroupper
5152 ; BITALG_NOVLX-NEXT: retq
5154 ; BITALG-LABEL: ult_13_v8i16:
5156 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
5157 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
5158 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
5160 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5161 %3 = icmp ult <8 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13>
5162 %4 = sext <8 x i1> %3 to <8 x i16>
5166 define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) {
5167 ; SSE2-LABEL: ugt_13_v8i16:
5169 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5170 ; SSE2-NEXT: psrlw $1, %xmm1
5171 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5172 ; SSE2-NEXT: psubb %xmm1, %xmm0
5173 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5174 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5175 ; SSE2-NEXT: pand %xmm1, %xmm2
5176 ; SSE2-NEXT: psrlw $2, %xmm0
5177 ; SSE2-NEXT: pand %xmm1, %xmm0
5178 ; SSE2-NEXT: paddb %xmm2, %xmm0
5179 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5180 ; SSE2-NEXT: psrlw $4, %xmm1
5181 ; SSE2-NEXT: paddb %xmm1, %xmm0
5182 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5183 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5184 ; SSE2-NEXT: psllw $8, %xmm1
5185 ; SSE2-NEXT: paddb %xmm1, %xmm0
5186 ; SSE2-NEXT: psrlw $8, %xmm0
5187 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5190 ; SSE3-LABEL: ugt_13_v8i16:
5192 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5193 ; SSE3-NEXT: psrlw $1, %xmm1
5194 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5195 ; SSE3-NEXT: psubb %xmm1, %xmm0
5196 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5197 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5198 ; SSE3-NEXT: pand %xmm1, %xmm2
5199 ; SSE3-NEXT: psrlw $2, %xmm0
5200 ; SSE3-NEXT: pand %xmm1, %xmm0
5201 ; SSE3-NEXT: paddb %xmm2, %xmm0
5202 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5203 ; SSE3-NEXT: psrlw $4, %xmm1
5204 ; SSE3-NEXT: paddb %xmm1, %xmm0
5205 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5206 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5207 ; SSE3-NEXT: psllw $8, %xmm1
5208 ; SSE3-NEXT: paddb %xmm1, %xmm0
5209 ; SSE3-NEXT: psrlw $8, %xmm0
5210 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5213 ; SSSE3-LABEL: ugt_13_v8i16:
5215 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5216 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
5217 ; SSSE3-NEXT: pand %xmm1, %xmm2
5218 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5219 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
5220 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
5221 ; SSSE3-NEXT: psrlw $4, %xmm0
5222 ; SSSE3-NEXT: pand %xmm1, %xmm0
5223 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
5224 ; SSSE3-NEXT: paddb %xmm4, %xmm3
5225 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
5226 ; SSSE3-NEXT: psllw $8, %xmm0
5227 ; SSSE3-NEXT: paddb %xmm3, %xmm0
5228 ; SSSE3-NEXT: psrlw $8, %xmm0
5229 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5232 ; SSE41-LABEL: ugt_13_v8i16:
5234 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5235 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5236 ; SSE41-NEXT: pand %xmm1, %xmm2
5237 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5238 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5239 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5240 ; SSE41-NEXT: psrlw $4, %xmm0
5241 ; SSE41-NEXT: pand %xmm1, %xmm0
5242 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5243 ; SSE41-NEXT: paddb %xmm4, %xmm3
5244 ; SSE41-NEXT: movdqa %xmm3, %xmm0
5245 ; SSE41-NEXT: psllw $8, %xmm0
5246 ; SSE41-NEXT: paddb %xmm3, %xmm0
5247 ; SSE41-NEXT: psrlw $8, %xmm0
5248 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5251 ; AVX1-LABEL: ugt_13_v8i16:
5253 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5254 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5255 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5256 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5257 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
5258 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5259 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5260 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5261 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
5262 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5263 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
5264 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5267 ; AVX2-LABEL: ugt_13_v8i16:
5269 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5270 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
5271 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5272 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5273 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
5274 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5275 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5276 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5277 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
5278 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5279 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
5280 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5283 ; AVX512VPOPCNTDQ-LABEL: ugt_13_v8i16:
5284 ; AVX512VPOPCNTDQ: # %bb.0:
5285 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5286 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5287 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
5288 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5289 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5290 ; AVX512VPOPCNTDQ-NEXT: retq
5292 ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v8i16:
5293 ; AVX512VPOPCNTDQVL: # %bb.0:
5294 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5295 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
5296 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
5297 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5298 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
5299 ; AVX512VPOPCNTDQVL-NEXT: retq
5301 ; BITALG_NOVLX-LABEL: ugt_13_v8i16:
5302 ; BITALG_NOVLX: # %bb.0:
5303 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5304 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
5305 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5306 ; BITALG_NOVLX-NEXT: vzeroupper
5307 ; BITALG_NOVLX-NEXT: retq
5309 ; BITALG-LABEL: ugt_13_v8i16:
5311 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
5312 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
5313 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
5315 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5316 %3 = icmp ugt <8 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13>
5317 %4 = sext <8 x i1> %3 to <8 x i16>
5321 define <8 x i16> @ult_14_v8i16(<8 x i16> %0) {
5322 ; SSE2-LABEL: ult_14_v8i16:
5324 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5325 ; SSE2-NEXT: psrlw $1, %xmm1
5326 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5327 ; SSE2-NEXT: psubb %xmm1, %xmm0
5328 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5329 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5330 ; SSE2-NEXT: pand %xmm1, %xmm2
5331 ; SSE2-NEXT: psrlw $2, %xmm0
5332 ; SSE2-NEXT: pand %xmm1, %xmm0
5333 ; SSE2-NEXT: paddb %xmm2, %xmm0
5334 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5335 ; SSE2-NEXT: psrlw $4, %xmm1
5336 ; SSE2-NEXT: paddb %xmm0, %xmm1
5337 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5338 ; SSE2-NEXT: movdqa %xmm1, %xmm2
5339 ; SSE2-NEXT: psllw $8, %xmm2
5340 ; SSE2-NEXT: paddb %xmm1, %xmm2
5341 ; SSE2-NEXT: psrlw $8, %xmm2
5342 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5343 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
5346 ; SSE3-LABEL: ult_14_v8i16:
5348 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5349 ; SSE3-NEXT: psrlw $1, %xmm1
5350 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5351 ; SSE3-NEXT: psubb %xmm1, %xmm0
5352 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5353 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5354 ; SSE3-NEXT: pand %xmm1, %xmm2
5355 ; SSE3-NEXT: psrlw $2, %xmm0
5356 ; SSE3-NEXT: pand %xmm1, %xmm0
5357 ; SSE3-NEXT: paddb %xmm2, %xmm0
5358 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5359 ; SSE3-NEXT: psrlw $4, %xmm1
5360 ; SSE3-NEXT: paddb %xmm0, %xmm1
5361 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5362 ; SSE3-NEXT: movdqa %xmm1, %xmm2
5363 ; SSE3-NEXT: psllw $8, %xmm2
5364 ; SSE3-NEXT: paddb %xmm1, %xmm2
5365 ; SSE3-NEXT: psrlw $8, %xmm2
5366 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5367 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
5370 ; SSSE3-LABEL: ult_14_v8i16:
5372 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5373 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
5374 ; SSSE3-NEXT: pand %xmm1, %xmm2
5375 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5376 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
5377 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
5378 ; SSSE3-NEXT: psrlw $4, %xmm0
5379 ; SSSE3-NEXT: pand %xmm1, %xmm0
5380 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
5381 ; SSSE3-NEXT: paddb %xmm4, %xmm3
5382 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
5383 ; SSSE3-NEXT: psllw $8, %xmm1
5384 ; SSSE3-NEXT: paddb %xmm3, %xmm1
5385 ; SSSE3-NEXT: psrlw $8, %xmm1
5386 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5387 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
5390 ; SSE41-LABEL: ult_14_v8i16:
5392 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5393 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5394 ; SSE41-NEXT: pand %xmm1, %xmm2
5395 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5396 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5397 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5398 ; SSE41-NEXT: psrlw $4, %xmm0
5399 ; SSE41-NEXT: pand %xmm1, %xmm0
5400 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5401 ; SSE41-NEXT: paddb %xmm4, %xmm3
5402 ; SSE41-NEXT: movdqa %xmm3, %xmm1
5403 ; SSE41-NEXT: psllw $8, %xmm1
5404 ; SSE41-NEXT: paddb %xmm3, %xmm1
5405 ; SSE41-NEXT: psrlw $8, %xmm1
5406 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5407 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
5410 ; AVX1-LABEL: ult_14_v8i16:
5412 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5413 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5414 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5415 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5416 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
5417 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5418 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5419 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5420 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
5421 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5422 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
5423 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5424 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5427 ; AVX2-LABEL: ult_14_v8i16:
5429 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5430 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
5431 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5432 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5433 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
5434 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5435 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5436 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5437 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
5438 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5439 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
5440 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5441 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5444 ; AVX512VPOPCNTDQ-LABEL: ult_14_v8i16:
5445 ; AVX512VPOPCNTDQ: # %bb.0:
5446 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5447 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5448 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
5449 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5450 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5451 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5452 ; AVX512VPOPCNTDQ-NEXT: retq
5454 ; AVX512VPOPCNTDQVL-LABEL: ult_14_v8i16:
5455 ; AVX512VPOPCNTDQVL: # %bb.0:
5456 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5457 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
5458 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
5459 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5460 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5461 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
5462 ; AVX512VPOPCNTDQVL-NEXT: retq
5464 ; BITALG_NOVLX-LABEL: ult_14_v8i16:
5465 ; BITALG_NOVLX: # %bb.0:
5466 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5467 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
5468 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5469 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5470 ; BITALG_NOVLX-NEXT: vzeroupper
5471 ; BITALG_NOVLX-NEXT: retq
5473 ; BITALG-LABEL: ult_14_v8i16:
5475 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
5476 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
5477 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
5479 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5480 %3 = icmp ult <8 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
5481 %4 = sext <8 x i1> %3 to <8 x i16>
5485 define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) {
5486 ; SSE2-LABEL: ugt_14_v8i16:
5488 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5489 ; SSE2-NEXT: psrlw $1, %xmm1
5490 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5491 ; SSE2-NEXT: psubb %xmm1, %xmm0
5492 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5493 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5494 ; SSE2-NEXT: pand %xmm1, %xmm2
5495 ; SSE2-NEXT: psrlw $2, %xmm0
5496 ; SSE2-NEXT: pand %xmm1, %xmm0
5497 ; SSE2-NEXT: paddb %xmm2, %xmm0
5498 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5499 ; SSE2-NEXT: psrlw $4, %xmm1
5500 ; SSE2-NEXT: paddb %xmm1, %xmm0
5501 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5502 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5503 ; SSE2-NEXT: psllw $8, %xmm1
5504 ; SSE2-NEXT: paddb %xmm1, %xmm0
5505 ; SSE2-NEXT: psrlw $8, %xmm0
5506 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5509 ; SSE3-LABEL: ugt_14_v8i16:
5511 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5512 ; SSE3-NEXT: psrlw $1, %xmm1
5513 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5514 ; SSE3-NEXT: psubb %xmm1, %xmm0
5515 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5516 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5517 ; SSE3-NEXT: pand %xmm1, %xmm2
5518 ; SSE3-NEXT: psrlw $2, %xmm0
5519 ; SSE3-NEXT: pand %xmm1, %xmm0
5520 ; SSE3-NEXT: paddb %xmm2, %xmm0
5521 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5522 ; SSE3-NEXT: psrlw $4, %xmm1
5523 ; SSE3-NEXT: paddb %xmm1, %xmm0
5524 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5525 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5526 ; SSE3-NEXT: psllw $8, %xmm1
5527 ; SSE3-NEXT: paddb %xmm1, %xmm0
5528 ; SSE3-NEXT: psrlw $8, %xmm0
5529 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5532 ; SSSE3-LABEL: ugt_14_v8i16:
5534 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5535 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
5536 ; SSSE3-NEXT: pand %xmm1, %xmm2
5537 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5538 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
5539 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
5540 ; SSSE3-NEXT: psrlw $4, %xmm0
5541 ; SSSE3-NEXT: pand %xmm1, %xmm0
5542 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
5543 ; SSSE3-NEXT: paddb %xmm4, %xmm3
5544 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
5545 ; SSSE3-NEXT: psllw $8, %xmm0
5546 ; SSSE3-NEXT: paddb %xmm3, %xmm0
5547 ; SSSE3-NEXT: psrlw $8, %xmm0
5548 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5551 ; SSE41-LABEL: ugt_14_v8i16:
5553 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5554 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5555 ; SSE41-NEXT: pand %xmm1, %xmm2
5556 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5557 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5558 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5559 ; SSE41-NEXT: psrlw $4, %xmm0
5560 ; SSE41-NEXT: pand %xmm1, %xmm0
5561 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5562 ; SSE41-NEXT: paddb %xmm4, %xmm3
5563 ; SSE41-NEXT: movdqa %xmm3, %xmm0
5564 ; SSE41-NEXT: psllw $8, %xmm0
5565 ; SSE41-NEXT: paddb %xmm3, %xmm0
5566 ; SSE41-NEXT: psrlw $8, %xmm0
5567 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5570 ; AVX1-LABEL: ugt_14_v8i16:
5572 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5573 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5574 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5575 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5576 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
5577 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5578 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5579 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5580 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
5581 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5582 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
5583 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5586 ; AVX2-LABEL: ugt_14_v8i16:
5588 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5589 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
5590 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5591 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5592 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
5593 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5594 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5595 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5596 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
5597 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5598 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
5599 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5602 ; AVX512VPOPCNTDQ-LABEL: ugt_14_v8i16:
5603 ; AVX512VPOPCNTDQ: # %bb.0:
5604 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5605 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5606 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
5607 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5608 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5609 ; AVX512VPOPCNTDQ-NEXT: retq
5611 ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v8i16:
5612 ; AVX512VPOPCNTDQVL: # %bb.0:
5613 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5614 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
5615 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
5616 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5617 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
5618 ; AVX512VPOPCNTDQVL-NEXT: retq
5620 ; BITALG_NOVLX-LABEL: ugt_14_v8i16:
5621 ; BITALG_NOVLX: # %bb.0:
5622 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5623 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
5624 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5625 ; BITALG_NOVLX-NEXT: vzeroupper
5626 ; BITALG_NOVLX-NEXT: retq
5628 ; BITALG-LABEL: ugt_14_v8i16:
5630 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
5631 ; BITALG-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
5632 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
5634 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5635 %3 = icmp ugt <8 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
5636 %4 = sext <8 x i1> %3 to <8 x i16>
5640 define <8 x i16> @ult_15_v8i16(<8 x i16> %0) {
5641 ; SSE2-LABEL: ult_15_v8i16:
5643 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5644 ; SSE2-NEXT: psrlw $1, %xmm1
5645 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5646 ; SSE2-NEXT: psubb %xmm1, %xmm0
5647 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5648 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5649 ; SSE2-NEXT: pand %xmm1, %xmm2
5650 ; SSE2-NEXT: psrlw $2, %xmm0
5651 ; SSE2-NEXT: pand %xmm1, %xmm0
5652 ; SSE2-NEXT: paddb %xmm2, %xmm0
5653 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5654 ; SSE2-NEXT: psrlw $4, %xmm1
5655 ; SSE2-NEXT: paddb %xmm0, %xmm1
5656 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5657 ; SSE2-NEXT: movdqa %xmm1, %xmm2
5658 ; SSE2-NEXT: psllw $8, %xmm2
5659 ; SSE2-NEXT: paddb %xmm1, %xmm2
5660 ; SSE2-NEXT: psrlw $8, %xmm2
5661 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5662 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
5665 ; SSE3-LABEL: ult_15_v8i16:
5667 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5668 ; SSE3-NEXT: psrlw $1, %xmm1
5669 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5670 ; SSE3-NEXT: psubb %xmm1, %xmm0
5671 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5672 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5673 ; SSE3-NEXT: pand %xmm1, %xmm2
5674 ; SSE3-NEXT: psrlw $2, %xmm0
5675 ; SSE3-NEXT: pand %xmm1, %xmm0
5676 ; SSE3-NEXT: paddb %xmm2, %xmm0
5677 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5678 ; SSE3-NEXT: psrlw $4, %xmm1
5679 ; SSE3-NEXT: paddb %xmm0, %xmm1
5680 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5681 ; SSE3-NEXT: movdqa %xmm1, %xmm2
5682 ; SSE3-NEXT: psllw $8, %xmm2
5683 ; SSE3-NEXT: paddb %xmm1, %xmm2
5684 ; SSE3-NEXT: psrlw $8, %xmm2
5685 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5686 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
5689 ; SSSE3-LABEL: ult_15_v8i16:
5691 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5692 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
5693 ; SSSE3-NEXT: pand %xmm1, %xmm2
5694 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5695 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
5696 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
5697 ; SSSE3-NEXT: psrlw $4, %xmm0
5698 ; SSSE3-NEXT: pand %xmm1, %xmm0
5699 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
5700 ; SSSE3-NEXT: paddb %xmm4, %xmm3
5701 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
5702 ; SSSE3-NEXT: psllw $8, %xmm1
5703 ; SSSE3-NEXT: paddb %xmm3, %xmm1
5704 ; SSSE3-NEXT: psrlw $8, %xmm1
5705 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5706 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
5709 ; SSE41-LABEL: ult_15_v8i16:
5711 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5712 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5713 ; SSE41-NEXT: pand %xmm1, %xmm2
5714 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5715 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5716 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5717 ; SSE41-NEXT: psrlw $4, %xmm0
5718 ; SSE41-NEXT: pand %xmm1, %xmm0
5719 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5720 ; SSE41-NEXT: paddb %xmm4, %xmm3
5721 ; SSE41-NEXT: movdqa %xmm3, %xmm1
5722 ; SSE41-NEXT: psllw $8, %xmm1
5723 ; SSE41-NEXT: paddb %xmm3, %xmm1
5724 ; SSE41-NEXT: psrlw $8, %xmm1
5725 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5726 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
5729 ; AVX1-LABEL: ult_15_v8i16:
5731 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5732 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5733 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5734 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5735 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
5736 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5737 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5738 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5739 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
5740 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5741 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
5742 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5743 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5746 ; AVX2-LABEL: ult_15_v8i16:
5748 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5749 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
5750 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5751 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5752 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
5753 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5754 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5755 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5756 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
5757 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5758 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
5759 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5760 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5763 ; AVX512VPOPCNTDQ-LABEL: ult_15_v8i16:
5764 ; AVX512VPOPCNTDQ: # %bb.0:
5765 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5766 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5767 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
5768 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5769 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5770 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5771 ; AVX512VPOPCNTDQ-NEXT: retq
5773 ; AVX512VPOPCNTDQVL-LABEL: ult_15_v8i16:
5774 ; AVX512VPOPCNTDQVL: # %bb.0:
5775 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5776 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
5777 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
5778 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5779 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5780 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
5781 ; AVX512VPOPCNTDQVL-NEXT: retq
5783 ; BITALG_NOVLX-LABEL: ult_15_v8i16:
5784 ; BITALG_NOVLX: # %bb.0:
5785 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5786 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
5787 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5788 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5789 ; BITALG_NOVLX-NEXT: vzeroupper
5790 ; BITALG_NOVLX-NEXT: retq
5792 ; BITALG-LABEL: ult_15_v8i16:
5794 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
5795 ; BITALG-NEXT: vpcmpltuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
5796 ; BITALG-NEXT: vpmovm2w %k0, %xmm0
5798 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5799 %3 = icmp ult <8 x i16> %2, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
5800 %4 = sext <8 x i1> %3 to <8 x i16>
5804 define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
5805 ; SSE-LABEL: ugt_1_v4i32:
5807 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
5808 ; SSE-NEXT: movdqa %xmm0, %xmm2
5809 ; SSE-NEXT: paddd %xmm1, %xmm2
5810 ; SSE-NEXT: pand %xmm2, %xmm0
5811 ; SSE-NEXT: pxor %xmm2, %xmm2
5812 ; SSE-NEXT: pcmpeqd %xmm2, %xmm0
5813 ; SSE-NEXT: pxor %xmm1, %xmm0
5816 ; AVX1-LABEL: ugt_1_v4i32:
5818 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5819 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2
5820 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
5821 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
5822 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
5823 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
5826 ; AVX2-LABEL: ugt_1_v4i32:
5828 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5829 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm2
5830 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
5831 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
5832 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
5833 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
5836 ; AVX512VPOPCNTDQ-LABEL: ugt_1_v4i32:
5837 ; AVX512VPOPCNTDQ: # %bb.0:
5838 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5839 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5840 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
5841 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
5842 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5843 ; AVX512VPOPCNTDQ-NEXT: retq
5845 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v4i32:
5846 ; AVX512VPOPCNTDQVL: # %bb.0:
5847 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
5848 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
5849 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
5850 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
5851 ; AVX512VPOPCNTDQVL-NEXT: retq
5853 ; BITALG_NOVLX-LABEL: ugt_1_v4i32:
5854 ; BITALG_NOVLX: # %bb.0:
5855 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5856 ; BITALG_NOVLX-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5857 ; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
5858 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
5859 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5860 ; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
5861 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5862 ; BITALG_NOVLX-NEXT: vzeroupper
5863 ; BITALG_NOVLX-NEXT: retq
5865 ; BITALG-LABEL: ugt_1_v4i32:
5867 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5868 ; BITALG-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5869 ; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
5870 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
5871 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5872 ; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
5874 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
5875 %3 = icmp ugt <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
5876 %4 = sext <4 x i1> %3 to <4 x i32>
5880 define <4 x i32> @ult_2_v4i32(<4 x i32> %0) {
5881 ; SSE-LABEL: ult_2_v4i32:
5883 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
5884 ; SSE-NEXT: paddd %xmm0, %xmm1
5885 ; SSE-NEXT: pand %xmm1, %xmm0
5886 ; SSE-NEXT: pxor %xmm1, %xmm1
5887 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
5890 ; AVX1-LABEL: ult_2_v4i32:
5892 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5893 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5894 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5895 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
5896 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5899 ; AVX2-LABEL: ult_2_v4i32:
5901 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5902 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5903 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5904 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
5905 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5908 ; AVX512VPOPCNTDQ-LABEL: ult_2_v4i32:
5909 ; AVX512VPOPCNTDQ: # %bb.0:
5910 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5911 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5912 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
5913 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
5914 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5915 ; AVX512VPOPCNTDQ-NEXT: retq
5917 ; AVX512VPOPCNTDQVL-LABEL: ult_2_v4i32:
5918 ; AVX512VPOPCNTDQVL: # %bb.0:
5919 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
5920 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
5921 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
5922 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
5923 ; AVX512VPOPCNTDQVL-NEXT: retq
5925 ; BITALG_NOVLX-LABEL: ult_2_v4i32:
5926 ; BITALG_NOVLX: # %bb.0:
5927 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5928 ; BITALG_NOVLX-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5929 ; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
5930 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
5931 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5932 ; BITALG_NOVLX-NEXT: retq
5934 ; BITALG-LABEL: ult_2_v4i32:
5936 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5937 ; BITALG-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5938 ; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
5939 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
5940 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5942 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
5943 %3 = icmp ult <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2>
5944 %4 = sext <4 x i1> %3 to <4 x i32>
5948 define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) {
5949 ; SSE2-LABEL: ugt_2_v4i32:
5951 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5952 ; SSE2-NEXT: psrlw $1, %xmm1
5953 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5954 ; SSE2-NEXT: psubb %xmm1, %xmm0
5955 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5956 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5957 ; SSE2-NEXT: pand %xmm1, %xmm2
5958 ; SSE2-NEXT: psrlw $2, %xmm0
5959 ; SSE2-NEXT: pand %xmm1, %xmm0
5960 ; SSE2-NEXT: paddb %xmm2, %xmm0
5961 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5962 ; SSE2-NEXT: psrlw $4, %xmm1
5963 ; SSE2-NEXT: paddb %xmm1, %xmm0
5964 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5965 ; SSE2-NEXT: pxor %xmm1, %xmm1
5966 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5967 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
5968 ; SSE2-NEXT: psadbw %xmm1, %xmm2
5969 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5970 ; SSE2-NEXT: psadbw %xmm1, %xmm0
5971 ; SSE2-NEXT: packuswb %xmm2, %xmm0
5972 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5975 ; SSE3-LABEL: ugt_2_v4i32:
5977 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5978 ; SSE3-NEXT: psrlw $1, %xmm1
5979 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5980 ; SSE3-NEXT: psubb %xmm1, %xmm0
5981 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5982 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5983 ; SSE3-NEXT: pand %xmm1, %xmm2
5984 ; SSE3-NEXT: psrlw $2, %xmm0
5985 ; SSE3-NEXT: pand %xmm1, %xmm0
5986 ; SSE3-NEXT: paddb %xmm2, %xmm0
5987 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5988 ; SSE3-NEXT: psrlw $4, %xmm1
5989 ; SSE3-NEXT: paddb %xmm1, %xmm0
5990 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5991 ; SSE3-NEXT: pxor %xmm1, %xmm1
5992 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5993 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
5994 ; SSE3-NEXT: psadbw %xmm1, %xmm2
5995 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5996 ; SSE3-NEXT: psadbw %xmm1, %xmm0
5997 ; SSE3-NEXT: packuswb %xmm2, %xmm0
5998 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6001 ; SSSE3-LABEL: ugt_2_v4i32:
6003 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6004 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
6005 ; SSSE3-NEXT: pand %xmm2, %xmm3
6006 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6007 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
6008 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
6009 ; SSSE3-NEXT: psrlw $4, %xmm0
6010 ; SSSE3-NEXT: pand %xmm2, %xmm0
6011 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
6012 ; SSSE3-NEXT: paddb %xmm4, %xmm1
6013 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6014 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
6015 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6016 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
6017 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6018 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6019 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
6020 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6021 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
6024 ; SSE41-LABEL: ugt_2_v4i32:
6026 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6027 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6028 ; SSE41-NEXT: pand %xmm1, %xmm2
6029 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6030 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6031 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6032 ; SSE41-NEXT: psrlw $4, %xmm0
6033 ; SSE41-NEXT: pand %xmm1, %xmm0
6034 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6035 ; SSE41-NEXT: paddb %xmm4, %xmm3
6036 ; SSE41-NEXT: pxor %xmm1, %xmm1
6037 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
6038 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
6039 ; SSE41-NEXT: psadbw %xmm1, %xmm3
6040 ; SSE41-NEXT: psadbw %xmm1, %xmm0
6041 ; SSE41-NEXT: packuswb %xmm3, %xmm0
6042 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6045 ; AVX1-LABEL: ugt_2_v4i32:
6047 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6048 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6049 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6050 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6051 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6052 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6053 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6054 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6055 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6056 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6057 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6058 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6059 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6060 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6061 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
6064 ; AVX2-LABEL: ugt_2_v4i32:
6066 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6067 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6068 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6069 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6070 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6071 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6072 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6073 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6074 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6075 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6076 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6077 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6078 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6079 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6080 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6081 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6084 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v4i32:
6085 ; AVX512VPOPCNTDQ: # %bb.0:
6086 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6087 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6088 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6089 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6090 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6091 ; AVX512VPOPCNTDQ-NEXT: retq
6093 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v4i32:
6094 ; AVX512VPOPCNTDQVL: # %bb.0:
6095 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6096 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
6097 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
6098 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6099 ; AVX512VPOPCNTDQVL-NEXT: retq
6101 ; BITALG_NOVLX-LABEL: ugt_2_v4i32:
6102 ; BITALG_NOVLX: # %bb.0:
6103 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6104 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6105 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6106 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6107 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6108 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6109 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6110 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6111 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6112 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6113 ; BITALG_NOVLX-NEXT: vzeroupper
6114 ; BITALG_NOVLX-NEXT: retq
6116 ; BITALG-LABEL: ugt_2_v4i32:
6118 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
6119 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
6120 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6121 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6122 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6123 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6124 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6125 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
6126 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
6127 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6129 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6130 %3 = icmp ugt <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2>
6131 %4 = sext <4 x i1> %3 to <4 x i32>
6135 define <4 x i32> @ult_3_v4i32(<4 x i32> %0) {
6136 ; SSE2-LABEL: ult_3_v4i32:
6138 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6139 ; SSE2-NEXT: psrlw $1, %xmm1
6140 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6141 ; SSE2-NEXT: psubb %xmm1, %xmm0
6142 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6143 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6144 ; SSE2-NEXT: pand %xmm1, %xmm2
6145 ; SSE2-NEXT: psrlw $2, %xmm0
6146 ; SSE2-NEXT: pand %xmm1, %xmm0
6147 ; SSE2-NEXT: paddb %xmm2, %xmm0
6148 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6149 ; SSE2-NEXT: psrlw $4, %xmm1
6150 ; SSE2-NEXT: paddb %xmm0, %xmm1
6151 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6152 ; SSE2-NEXT: pxor %xmm0, %xmm0
6153 ; SSE2-NEXT: movdqa %xmm1, %xmm2
6154 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6155 ; SSE2-NEXT: psadbw %xmm0, %xmm2
6156 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6157 ; SSE2-NEXT: psadbw %xmm0, %xmm1
6158 ; SSE2-NEXT: packuswb %xmm2, %xmm1
6159 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
6160 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
6163 ; SSE3-LABEL: ult_3_v4i32:
6165 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6166 ; SSE3-NEXT: psrlw $1, %xmm1
6167 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6168 ; SSE3-NEXT: psubb %xmm1, %xmm0
6169 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6170 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6171 ; SSE3-NEXT: pand %xmm1, %xmm2
6172 ; SSE3-NEXT: psrlw $2, %xmm0
6173 ; SSE3-NEXT: pand %xmm1, %xmm0
6174 ; SSE3-NEXT: paddb %xmm2, %xmm0
6175 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6176 ; SSE3-NEXT: psrlw $4, %xmm1
6177 ; SSE3-NEXT: paddb %xmm0, %xmm1
6178 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6179 ; SSE3-NEXT: pxor %xmm0, %xmm0
6180 ; SSE3-NEXT: movdqa %xmm1, %xmm2
6181 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6182 ; SSE3-NEXT: psadbw %xmm0, %xmm2
6183 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6184 ; SSE3-NEXT: psadbw %xmm0, %xmm1
6185 ; SSE3-NEXT: packuswb %xmm2, %xmm1
6186 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
6187 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
6190 ; SSSE3-LABEL: ult_3_v4i32:
6192 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6193 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
6194 ; SSSE3-NEXT: pand %xmm1, %xmm2
6195 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6196 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
6197 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
6198 ; SSSE3-NEXT: psrlw $4, %xmm0
6199 ; SSSE3-NEXT: pand %xmm1, %xmm0
6200 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
6201 ; SSSE3-NEXT: paddb %xmm4, %xmm3
6202 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6203 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
6204 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6205 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6206 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
6207 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
6208 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
6209 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
6210 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
6213 ; SSE41-LABEL: ult_3_v4i32:
6215 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6216 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6217 ; SSE41-NEXT: pand %xmm1, %xmm2
6218 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6219 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6220 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6221 ; SSE41-NEXT: psrlw $4, %xmm0
6222 ; SSE41-NEXT: pand %xmm1, %xmm0
6223 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6224 ; SSE41-NEXT: paddb %xmm4, %xmm3
6225 ; SSE41-NEXT: pxor %xmm0, %xmm0
6226 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
6227 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
6228 ; SSE41-NEXT: psadbw %xmm0, %xmm3
6229 ; SSE41-NEXT: psadbw %xmm0, %xmm1
6230 ; SSE41-NEXT: packuswb %xmm3, %xmm1
6231 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
6232 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
6235 ; AVX1-LABEL: ult_3_v4i32:
6237 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6238 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6239 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6240 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6241 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6242 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6243 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6244 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6245 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6246 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6247 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6248 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6249 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6250 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6251 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3]
6252 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6255 ; AVX2-LABEL: ult_3_v4i32:
6257 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6258 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6259 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6260 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6261 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6262 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6263 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6264 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6265 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6266 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6267 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6268 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6269 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6270 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6271 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6272 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6275 ; AVX512VPOPCNTDQ-LABEL: ult_3_v4i32:
6276 ; AVX512VPOPCNTDQ: # %bb.0:
6277 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6278 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6279 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6280 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6281 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6282 ; AVX512VPOPCNTDQ-NEXT: retq
6284 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v4i32:
6285 ; AVX512VPOPCNTDQVL: # %bb.0:
6286 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6287 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
6288 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
6289 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6290 ; AVX512VPOPCNTDQVL-NEXT: retq
6292 ; BITALG_NOVLX-LABEL: ult_3_v4i32:
6293 ; BITALG_NOVLX: # %bb.0:
6294 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6295 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6296 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6297 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6298 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6299 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6300 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6301 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6302 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6303 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6304 ; BITALG_NOVLX-NEXT: vzeroupper
6305 ; BITALG_NOVLX-NEXT: retq
6307 ; BITALG-LABEL: ult_3_v4i32:
6309 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
6310 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
6311 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6312 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6313 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6314 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6315 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6316 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
6317 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
6318 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6320 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6321 %3 = icmp ult <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3>
6322 %4 = sext <4 x i1> %3 to <4 x i32>
6326 define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) {
6327 ; SSE2-LABEL: ugt_3_v4i32:
6329 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6330 ; SSE2-NEXT: psrlw $1, %xmm1
6331 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6332 ; SSE2-NEXT: psubb %xmm1, %xmm0
6333 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6334 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6335 ; SSE2-NEXT: pand %xmm1, %xmm2
6336 ; SSE2-NEXT: psrlw $2, %xmm0
6337 ; SSE2-NEXT: pand %xmm1, %xmm0
6338 ; SSE2-NEXT: paddb %xmm2, %xmm0
6339 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6340 ; SSE2-NEXT: psrlw $4, %xmm1
6341 ; SSE2-NEXT: paddb %xmm1, %xmm0
6342 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6343 ; SSE2-NEXT: pxor %xmm1, %xmm1
6344 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6345 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
6346 ; SSE2-NEXT: psadbw %xmm1, %xmm2
6347 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6348 ; SSE2-NEXT: psadbw %xmm1, %xmm0
6349 ; SSE2-NEXT: packuswb %xmm2, %xmm0
6350 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6353 ; SSE3-LABEL: ugt_3_v4i32:
6355 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6356 ; SSE3-NEXT: psrlw $1, %xmm1
6357 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6358 ; SSE3-NEXT: psubb %xmm1, %xmm0
6359 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6360 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6361 ; SSE3-NEXT: pand %xmm1, %xmm2
6362 ; SSE3-NEXT: psrlw $2, %xmm0
6363 ; SSE3-NEXT: pand %xmm1, %xmm0
6364 ; SSE3-NEXT: paddb %xmm2, %xmm0
6365 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6366 ; SSE3-NEXT: psrlw $4, %xmm1
6367 ; SSE3-NEXT: paddb %xmm1, %xmm0
6368 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6369 ; SSE3-NEXT: pxor %xmm1, %xmm1
6370 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6371 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
6372 ; SSE3-NEXT: psadbw %xmm1, %xmm2
6373 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6374 ; SSE3-NEXT: psadbw %xmm1, %xmm0
6375 ; SSE3-NEXT: packuswb %xmm2, %xmm0
6376 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6379 ; SSSE3-LABEL: ugt_3_v4i32:
6381 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6382 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
6383 ; SSSE3-NEXT: pand %xmm2, %xmm3
6384 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6385 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
6386 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
6387 ; SSSE3-NEXT: psrlw $4, %xmm0
6388 ; SSSE3-NEXT: pand %xmm2, %xmm0
6389 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
6390 ; SSSE3-NEXT: paddb %xmm4, %xmm1
6391 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6392 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
6393 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6394 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
6395 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6396 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6397 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
6398 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6399 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
6402 ; SSE41-LABEL: ugt_3_v4i32:
6404 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6405 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6406 ; SSE41-NEXT: pand %xmm1, %xmm2
6407 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6408 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6409 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6410 ; SSE41-NEXT: psrlw $4, %xmm0
6411 ; SSE41-NEXT: pand %xmm1, %xmm0
6412 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6413 ; SSE41-NEXT: paddb %xmm4, %xmm3
6414 ; SSE41-NEXT: pxor %xmm1, %xmm1
6415 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
6416 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
6417 ; SSE41-NEXT: psadbw %xmm1, %xmm3
6418 ; SSE41-NEXT: psadbw %xmm1, %xmm0
6419 ; SSE41-NEXT: packuswb %xmm3, %xmm0
6420 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6423 ; AVX1-LABEL: ugt_3_v4i32:
6425 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6426 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6427 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6428 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6429 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6430 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6431 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6432 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6433 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6434 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6435 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6436 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6437 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6438 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6439 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
6442 ; AVX2-LABEL: ugt_3_v4i32:
6444 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6445 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6446 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6447 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6448 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6449 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6450 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6451 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6452 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6453 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6454 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6455 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6456 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6457 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6458 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6459 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6462 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v4i32:
6463 ; AVX512VPOPCNTDQ: # %bb.0:
6464 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6465 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6466 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6467 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6468 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6469 ; AVX512VPOPCNTDQ-NEXT: retq
6471 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v4i32:
6472 ; AVX512VPOPCNTDQVL: # %bb.0:
6473 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6474 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
6475 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
6476 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6477 ; AVX512VPOPCNTDQVL-NEXT: retq
6479 ; BITALG_NOVLX-LABEL: ugt_3_v4i32:
6480 ; BITALG_NOVLX: # %bb.0:
6481 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6482 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6483 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6484 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6485 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6486 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6487 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6488 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6489 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6490 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6491 ; BITALG_NOVLX-NEXT: vzeroupper
6492 ; BITALG_NOVLX-NEXT: retq
6494 ; BITALG-LABEL: ugt_3_v4i32:
6496 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
6497 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
6498 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6499 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6500 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6501 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6502 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6503 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
6504 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
6505 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6507 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6508 %3 = icmp ugt <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3>
6509 %4 = sext <4 x i1> %3 to <4 x i32>
6513 define <4 x i32> @ult_4_v4i32(<4 x i32> %0) {
6514 ; SSE2-LABEL: ult_4_v4i32:
6516 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6517 ; SSE2-NEXT: psrlw $1, %xmm1
6518 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6519 ; SSE2-NEXT: psubb %xmm1, %xmm0
6520 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6521 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6522 ; SSE2-NEXT: pand %xmm1, %xmm2
6523 ; SSE2-NEXT: psrlw $2, %xmm0
6524 ; SSE2-NEXT: pand %xmm1, %xmm0
6525 ; SSE2-NEXT: paddb %xmm2, %xmm0
6526 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6527 ; SSE2-NEXT: psrlw $4, %xmm1
6528 ; SSE2-NEXT: paddb %xmm0, %xmm1
6529 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6530 ; SSE2-NEXT: pxor %xmm0, %xmm0
6531 ; SSE2-NEXT: movdqa %xmm1, %xmm2
6532 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6533 ; SSE2-NEXT: psadbw %xmm0, %xmm2
6534 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6535 ; SSE2-NEXT: psadbw %xmm0, %xmm1
6536 ; SSE2-NEXT: packuswb %xmm2, %xmm1
6537 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
6538 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
6541 ; SSE3-LABEL: ult_4_v4i32:
6543 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6544 ; SSE3-NEXT: psrlw $1, %xmm1
6545 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6546 ; SSE3-NEXT: psubb %xmm1, %xmm0
6547 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6548 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6549 ; SSE3-NEXT: pand %xmm1, %xmm2
6550 ; SSE3-NEXT: psrlw $2, %xmm0
6551 ; SSE3-NEXT: pand %xmm1, %xmm0
6552 ; SSE3-NEXT: paddb %xmm2, %xmm0
6553 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6554 ; SSE3-NEXT: psrlw $4, %xmm1
6555 ; SSE3-NEXT: paddb %xmm0, %xmm1
6556 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6557 ; SSE3-NEXT: pxor %xmm0, %xmm0
6558 ; SSE3-NEXT: movdqa %xmm1, %xmm2
6559 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6560 ; SSE3-NEXT: psadbw %xmm0, %xmm2
6561 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6562 ; SSE3-NEXT: psadbw %xmm0, %xmm1
6563 ; SSE3-NEXT: packuswb %xmm2, %xmm1
6564 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
6565 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
6568 ; SSSE3-LABEL: ult_4_v4i32:
6570 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6571 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
6572 ; SSSE3-NEXT: pand %xmm1, %xmm2
6573 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6574 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
6575 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
6576 ; SSSE3-NEXT: psrlw $4, %xmm0
6577 ; SSSE3-NEXT: pand %xmm1, %xmm0
6578 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
6579 ; SSSE3-NEXT: paddb %xmm4, %xmm3
6580 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6581 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
6582 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6583 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6584 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
6585 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
6586 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
6587 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
6588 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
6591 ; SSE41-LABEL: ult_4_v4i32:
6593 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6594 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6595 ; SSE41-NEXT: pand %xmm1, %xmm2
6596 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6597 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6598 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6599 ; SSE41-NEXT: psrlw $4, %xmm0
6600 ; SSE41-NEXT: pand %xmm1, %xmm0
6601 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6602 ; SSE41-NEXT: paddb %xmm4, %xmm3
6603 ; SSE41-NEXT: pxor %xmm0, %xmm0
6604 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
6605 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
6606 ; SSE41-NEXT: psadbw %xmm0, %xmm3
6607 ; SSE41-NEXT: psadbw %xmm0, %xmm1
6608 ; SSE41-NEXT: packuswb %xmm3, %xmm1
6609 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
6610 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
6613 ; AVX1-LABEL: ult_4_v4i32:
6615 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6616 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6617 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6618 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6619 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6620 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6621 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6622 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6623 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6624 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6625 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6626 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6627 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6628 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6629 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4]
6630 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6633 ; AVX2-LABEL: ult_4_v4i32:
6635 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6636 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6637 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6638 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6639 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6640 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6641 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6642 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6643 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6644 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6645 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6646 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6647 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6648 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6649 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6650 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6653 ; AVX512VPOPCNTDQ-LABEL: ult_4_v4i32:
6654 ; AVX512VPOPCNTDQ: # %bb.0:
6655 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6656 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6657 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6658 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6659 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6660 ; AVX512VPOPCNTDQ-NEXT: retq
6662 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v4i32:
6663 ; AVX512VPOPCNTDQVL: # %bb.0:
6664 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6665 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
6666 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
6667 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6668 ; AVX512VPOPCNTDQVL-NEXT: retq
6670 ; BITALG_NOVLX-LABEL: ult_4_v4i32:
6671 ; BITALG_NOVLX: # %bb.0:
6672 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6673 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6674 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6675 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6676 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6677 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6678 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6679 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6680 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6681 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6682 ; BITALG_NOVLX-NEXT: vzeroupper
6683 ; BITALG_NOVLX-NEXT: retq
6685 ; BITALG-LABEL: ult_4_v4i32:
6687 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
6688 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
6689 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6690 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6691 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6692 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6693 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6694 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
6695 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
6696 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6698 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6699 %3 = icmp ult <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
6700 %4 = sext <4 x i1> %3 to <4 x i32>
6704 define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) {
6705 ; SSE2-LABEL: ugt_4_v4i32:
6707 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6708 ; SSE2-NEXT: psrlw $1, %xmm1
6709 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6710 ; SSE2-NEXT: psubb %xmm1, %xmm0
6711 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6712 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6713 ; SSE2-NEXT: pand %xmm1, %xmm2
6714 ; SSE2-NEXT: psrlw $2, %xmm0
6715 ; SSE2-NEXT: pand %xmm1, %xmm0
6716 ; SSE2-NEXT: paddb %xmm2, %xmm0
6717 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6718 ; SSE2-NEXT: psrlw $4, %xmm1
6719 ; SSE2-NEXT: paddb %xmm1, %xmm0
6720 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6721 ; SSE2-NEXT: pxor %xmm1, %xmm1
6722 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6723 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
6724 ; SSE2-NEXT: psadbw %xmm1, %xmm2
6725 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6726 ; SSE2-NEXT: psadbw %xmm1, %xmm0
6727 ; SSE2-NEXT: packuswb %xmm2, %xmm0
6728 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6731 ; SSE3-LABEL: ugt_4_v4i32:
6733 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6734 ; SSE3-NEXT: psrlw $1, %xmm1
6735 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6736 ; SSE3-NEXT: psubb %xmm1, %xmm0
6737 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6738 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6739 ; SSE3-NEXT: pand %xmm1, %xmm2
6740 ; SSE3-NEXT: psrlw $2, %xmm0
6741 ; SSE3-NEXT: pand %xmm1, %xmm0
6742 ; SSE3-NEXT: paddb %xmm2, %xmm0
6743 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6744 ; SSE3-NEXT: psrlw $4, %xmm1
6745 ; SSE3-NEXT: paddb %xmm1, %xmm0
6746 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6747 ; SSE3-NEXT: pxor %xmm1, %xmm1
6748 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6749 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
6750 ; SSE3-NEXT: psadbw %xmm1, %xmm2
6751 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6752 ; SSE3-NEXT: psadbw %xmm1, %xmm0
6753 ; SSE3-NEXT: packuswb %xmm2, %xmm0
6754 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6757 ; SSSE3-LABEL: ugt_4_v4i32:
6759 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6760 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
6761 ; SSSE3-NEXT: pand %xmm2, %xmm3
6762 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6763 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
6764 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
6765 ; SSSE3-NEXT: psrlw $4, %xmm0
6766 ; SSSE3-NEXT: pand %xmm2, %xmm0
6767 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
6768 ; SSSE3-NEXT: paddb %xmm4, %xmm1
6769 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6770 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
6771 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6772 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
6773 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6774 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6775 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
6776 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6777 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
6780 ; SSE41-LABEL: ugt_4_v4i32:
6782 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6783 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6784 ; SSE41-NEXT: pand %xmm1, %xmm2
6785 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6786 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6787 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6788 ; SSE41-NEXT: psrlw $4, %xmm0
6789 ; SSE41-NEXT: pand %xmm1, %xmm0
6790 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6791 ; SSE41-NEXT: paddb %xmm4, %xmm3
6792 ; SSE41-NEXT: pxor %xmm1, %xmm1
6793 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
6794 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
6795 ; SSE41-NEXT: psadbw %xmm1, %xmm3
6796 ; SSE41-NEXT: psadbw %xmm1, %xmm0
6797 ; SSE41-NEXT: packuswb %xmm3, %xmm0
6798 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6801 ; AVX1-LABEL: ugt_4_v4i32:
6803 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6804 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6805 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6806 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6807 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6808 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6809 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6810 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6811 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6812 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6813 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6814 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6815 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6816 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6817 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
6820 ; AVX2-LABEL: ugt_4_v4i32:
6822 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6823 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6824 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6825 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6826 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6827 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6828 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6829 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6830 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6831 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6832 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6833 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6834 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6835 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6836 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6837 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6840 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v4i32:
6841 ; AVX512VPOPCNTDQ: # %bb.0:
6842 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6843 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6844 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6845 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6846 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6847 ; AVX512VPOPCNTDQ-NEXT: retq
6849 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v4i32:
6850 ; AVX512VPOPCNTDQVL: # %bb.0:
6851 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6852 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
6853 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
6854 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6855 ; AVX512VPOPCNTDQVL-NEXT: retq
6857 ; BITALG_NOVLX-LABEL: ugt_4_v4i32:
6858 ; BITALG_NOVLX: # %bb.0:
6859 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6860 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6861 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6862 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6863 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6864 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6865 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6866 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6867 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6868 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6869 ; BITALG_NOVLX-NEXT: vzeroupper
6870 ; BITALG_NOVLX-NEXT: retq
6872 ; BITALG-LABEL: ugt_4_v4i32:
6874 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
6875 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
6876 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6877 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6878 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6879 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6880 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6881 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
6882 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
6883 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
6885 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6886 %3 = icmp ugt <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
6887 %4 = sext <4 x i1> %3 to <4 x i32>
6891 define <4 x i32> @ult_5_v4i32(<4 x i32> %0) {
6892 ; SSE2-LABEL: ult_5_v4i32:
6894 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6895 ; SSE2-NEXT: psrlw $1, %xmm1
6896 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6897 ; SSE2-NEXT: psubb %xmm1, %xmm0
6898 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6899 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6900 ; SSE2-NEXT: pand %xmm1, %xmm2
6901 ; SSE2-NEXT: psrlw $2, %xmm0
6902 ; SSE2-NEXT: pand %xmm1, %xmm0
6903 ; SSE2-NEXT: paddb %xmm2, %xmm0
6904 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6905 ; SSE2-NEXT: psrlw $4, %xmm1
6906 ; SSE2-NEXT: paddb %xmm0, %xmm1
6907 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6908 ; SSE2-NEXT: pxor %xmm0, %xmm0
6909 ; SSE2-NEXT: movdqa %xmm1, %xmm2
6910 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6911 ; SSE2-NEXT: psadbw %xmm0, %xmm2
6912 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6913 ; SSE2-NEXT: psadbw %xmm0, %xmm1
6914 ; SSE2-NEXT: packuswb %xmm2, %xmm1
6915 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
6916 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
6919 ; SSE3-LABEL: ult_5_v4i32:
6921 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6922 ; SSE3-NEXT: psrlw $1, %xmm1
6923 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6924 ; SSE3-NEXT: psubb %xmm1, %xmm0
6925 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6926 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6927 ; SSE3-NEXT: pand %xmm1, %xmm2
6928 ; SSE3-NEXT: psrlw $2, %xmm0
6929 ; SSE3-NEXT: pand %xmm1, %xmm0
6930 ; SSE3-NEXT: paddb %xmm2, %xmm0
6931 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6932 ; SSE3-NEXT: psrlw $4, %xmm1
6933 ; SSE3-NEXT: paddb %xmm0, %xmm1
6934 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6935 ; SSE3-NEXT: pxor %xmm0, %xmm0
6936 ; SSE3-NEXT: movdqa %xmm1, %xmm2
6937 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6938 ; SSE3-NEXT: psadbw %xmm0, %xmm2
6939 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6940 ; SSE3-NEXT: psadbw %xmm0, %xmm1
6941 ; SSE3-NEXT: packuswb %xmm2, %xmm1
6942 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
6943 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
6946 ; SSSE3-LABEL: ult_5_v4i32:
6948 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6949 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
6950 ; SSSE3-NEXT: pand %xmm1, %xmm2
6951 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6952 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
6953 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
6954 ; SSSE3-NEXT: psrlw $4, %xmm0
6955 ; SSSE3-NEXT: pand %xmm1, %xmm0
6956 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
6957 ; SSSE3-NEXT: paddb %xmm4, %xmm3
6958 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6959 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
6960 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6961 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6962 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
6963 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
6964 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
6965 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
6966 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
6969 ; SSE41-LABEL: ult_5_v4i32:
6971 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6972 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6973 ; SSE41-NEXT: pand %xmm1, %xmm2
6974 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6975 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6976 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6977 ; SSE41-NEXT: psrlw $4, %xmm0
6978 ; SSE41-NEXT: pand %xmm1, %xmm0
6979 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6980 ; SSE41-NEXT: paddb %xmm4, %xmm3
6981 ; SSE41-NEXT: pxor %xmm0, %xmm0
6982 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
6983 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
6984 ; SSE41-NEXT: psadbw %xmm0, %xmm3
6985 ; SSE41-NEXT: psadbw %xmm0, %xmm1
6986 ; SSE41-NEXT: packuswb %xmm3, %xmm1
6987 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
6988 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
6991 ; AVX1-LABEL: ult_5_v4i32:
6993 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6994 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6995 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6996 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6997 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6998 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6999 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7000 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7001 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7002 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7003 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7004 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7005 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7006 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7007 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5]
7008 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7011 ; AVX2-LABEL: ult_5_v4i32:
7013 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7014 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7015 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7016 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7017 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7018 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7019 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7020 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7021 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7022 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7023 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7024 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7025 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7026 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7027 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7028 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7031 ; AVX512VPOPCNTDQ-LABEL: ult_5_v4i32:
7032 ; AVX512VPOPCNTDQ: # %bb.0:
7033 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7034 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7035 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7036 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7037 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7038 ; AVX512VPOPCNTDQ-NEXT: retq
7040 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v4i32:
7041 ; AVX512VPOPCNTDQVL: # %bb.0:
7042 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7043 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7044 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7045 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7046 ; AVX512VPOPCNTDQVL-NEXT: retq
7048 ; BITALG_NOVLX-LABEL: ult_5_v4i32:
7049 ; BITALG_NOVLX: # %bb.0:
7050 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7051 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7052 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7053 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7054 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7055 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7056 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7057 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7058 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7059 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7060 ; BITALG_NOVLX-NEXT: vzeroupper
7061 ; BITALG_NOVLX-NEXT: retq
7063 ; BITALG-LABEL: ult_5_v4i32:
7065 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7066 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7067 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7068 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7069 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7070 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7071 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7072 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7073 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7074 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7076 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7077 %3 = icmp ult <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
7078 %4 = sext <4 x i1> %3 to <4 x i32>
7082 define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) {
7083 ; SSE2-LABEL: ugt_5_v4i32:
7085 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7086 ; SSE2-NEXT: psrlw $1, %xmm1
7087 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7088 ; SSE2-NEXT: psubb %xmm1, %xmm0
7089 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7090 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7091 ; SSE2-NEXT: pand %xmm1, %xmm2
7092 ; SSE2-NEXT: psrlw $2, %xmm0
7093 ; SSE2-NEXT: pand %xmm1, %xmm0
7094 ; SSE2-NEXT: paddb %xmm2, %xmm0
7095 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7096 ; SSE2-NEXT: psrlw $4, %xmm1
7097 ; SSE2-NEXT: paddb %xmm1, %xmm0
7098 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7099 ; SSE2-NEXT: pxor %xmm1, %xmm1
7100 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7101 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7102 ; SSE2-NEXT: psadbw %xmm1, %xmm2
7103 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7104 ; SSE2-NEXT: psadbw %xmm1, %xmm0
7105 ; SSE2-NEXT: packuswb %xmm2, %xmm0
7106 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7109 ; SSE3-LABEL: ugt_5_v4i32:
7111 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7112 ; SSE3-NEXT: psrlw $1, %xmm1
7113 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7114 ; SSE3-NEXT: psubb %xmm1, %xmm0
7115 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7116 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7117 ; SSE3-NEXT: pand %xmm1, %xmm2
7118 ; SSE3-NEXT: psrlw $2, %xmm0
7119 ; SSE3-NEXT: pand %xmm1, %xmm0
7120 ; SSE3-NEXT: paddb %xmm2, %xmm0
7121 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7122 ; SSE3-NEXT: psrlw $4, %xmm1
7123 ; SSE3-NEXT: paddb %xmm1, %xmm0
7124 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7125 ; SSE3-NEXT: pxor %xmm1, %xmm1
7126 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7127 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7128 ; SSE3-NEXT: psadbw %xmm1, %xmm2
7129 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7130 ; SSE3-NEXT: psadbw %xmm1, %xmm0
7131 ; SSE3-NEXT: packuswb %xmm2, %xmm0
7132 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7135 ; SSSE3-LABEL: ugt_5_v4i32:
7137 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7138 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
7139 ; SSSE3-NEXT: pand %xmm2, %xmm3
7140 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7141 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
7142 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
7143 ; SSSE3-NEXT: psrlw $4, %xmm0
7144 ; SSSE3-NEXT: pand %xmm2, %xmm0
7145 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
7146 ; SSSE3-NEXT: paddb %xmm4, %xmm1
7147 ; SSSE3-NEXT: pxor %xmm0, %xmm0
7148 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
7149 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7150 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
7151 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7152 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
7153 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
7154 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7155 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
7158 ; SSE41-LABEL: ugt_5_v4i32:
7160 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7161 ; SSE41-NEXT: movdqa %xmm0, %xmm2
7162 ; SSE41-NEXT: pand %xmm1, %xmm2
7163 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7164 ; SSE41-NEXT: movdqa %xmm3, %xmm4
7165 ; SSE41-NEXT: pshufb %xmm2, %xmm4
7166 ; SSE41-NEXT: psrlw $4, %xmm0
7167 ; SSE41-NEXT: pand %xmm1, %xmm0
7168 ; SSE41-NEXT: pshufb %xmm0, %xmm3
7169 ; SSE41-NEXT: paddb %xmm4, %xmm3
7170 ; SSE41-NEXT: pxor %xmm1, %xmm1
7171 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
7172 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
7173 ; SSE41-NEXT: psadbw %xmm1, %xmm3
7174 ; SSE41-NEXT: psadbw %xmm1, %xmm0
7175 ; SSE41-NEXT: packuswb %xmm3, %xmm0
7176 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7179 ; AVX1-LABEL: ugt_5_v4i32:
7181 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7182 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
7183 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7184 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7185 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
7186 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7187 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7188 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7189 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7190 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7191 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7192 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7193 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7194 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7195 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
7198 ; AVX2-LABEL: ugt_5_v4i32:
7200 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7201 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7202 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7203 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7204 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7205 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7206 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7207 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7208 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7209 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7210 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7211 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7212 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7213 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7214 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7215 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7218 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v4i32:
7219 ; AVX512VPOPCNTDQ: # %bb.0:
7220 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7221 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7222 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7223 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7224 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7225 ; AVX512VPOPCNTDQ-NEXT: retq
7227 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v4i32:
7228 ; AVX512VPOPCNTDQVL: # %bb.0:
7229 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7230 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7231 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7232 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7233 ; AVX512VPOPCNTDQVL-NEXT: retq
7235 ; BITALG_NOVLX-LABEL: ugt_5_v4i32:
7236 ; BITALG_NOVLX: # %bb.0:
7237 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7238 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7239 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7240 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7241 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7242 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7243 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7244 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7245 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7246 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7247 ; BITALG_NOVLX-NEXT: vzeroupper
7248 ; BITALG_NOVLX-NEXT: retq
7250 ; BITALG-LABEL: ugt_5_v4i32:
7252 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7253 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7254 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7255 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7256 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7257 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7258 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7259 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7260 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7261 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7263 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7264 %3 = icmp ugt <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
7265 %4 = sext <4 x i1> %3 to <4 x i32>
7269 define <4 x i32> @ult_6_v4i32(<4 x i32> %0) {
7270 ; SSE2-LABEL: ult_6_v4i32:
7272 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7273 ; SSE2-NEXT: psrlw $1, %xmm1
7274 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7275 ; SSE2-NEXT: psubb %xmm1, %xmm0
7276 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7277 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7278 ; SSE2-NEXT: pand %xmm1, %xmm2
7279 ; SSE2-NEXT: psrlw $2, %xmm0
7280 ; SSE2-NEXT: pand %xmm1, %xmm0
7281 ; SSE2-NEXT: paddb %xmm2, %xmm0
7282 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7283 ; SSE2-NEXT: psrlw $4, %xmm1
7284 ; SSE2-NEXT: paddb %xmm0, %xmm1
7285 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7286 ; SSE2-NEXT: pxor %xmm0, %xmm0
7287 ; SSE2-NEXT: movdqa %xmm1, %xmm2
7288 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7289 ; SSE2-NEXT: psadbw %xmm0, %xmm2
7290 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7291 ; SSE2-NEXT: psadbw %xmm0, %xmm1
7292 ; SSE2-NEXT: packuswb %xmm2, %xmm1
7293 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
7294 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
7297 ; SSE3-LABEL: ult_6_v4i32:
7299 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7300 ; SSE3-NEXT: psrlw $1, %xmm1
7301 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7302 ; SSE3-NEXT: psubb %xmm1, %xmm0
7303 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7304 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7305 ; SSE3-NEXT: pand %xmm1, %xmm2
7306 ; SSE3-NEXT: psrlw $2, %xmm0
7307 ; SSE3-NEXT: pand %xmm1, %xmm0
7308 ; SSE3-NEXT: paddb %xmm2, %xmm0
7309 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7310 ; SSE3-NEXT: psrlw $4, %xmm1
7311 ; SSE3-NEXT: paddb %xmm0, %xmm1
7312 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7313 ; SSE3-NEXT: pxor %xmm0, %xmm0
7314 ; SSE3-NEXT: movdqa %xmm1, %xmm2
7315 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7316 ; SSE3-NEXT: psadbw %xmm0, %xmm2
7317 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7318 ; SSE3-NEXT: psadbw %xmm0, %xmm1
7319 ; SSE3-NEXT: packuswb %xmm2, %xmm1
7320 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
7321 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
7324 ; SSSE3-LABEL: ult_6_v4i32:
7326 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7327 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
7328 ; SSSE3-NEXT: pand %xmm1, %xmm2
7329 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7330 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
7331 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
7332 ; SSSE3-NEXT: psrlw $4, %xmm0
7333 ; SSSE3-NEXT: pand %xmm1, %xmm0
7334 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
7335 ; SSSE3-NEXT: paddb %xmm4, %xmm3
7336 ; SSSE3-NEXT: pxor %xmm0, %xmm0
7337 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
7338 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
7339 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
7340 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
7341 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
7342 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
7343 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
7344 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
7347 ; SSE41-LABEL: ult_6_v4i32:
7349 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7350 ; SSE41-NEXT: movdqa %xmm0, %xmm2
7351 ; SSE41-NEXT: pand %xmm1, %xmm2
7352 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7353 ; SSE41-NEXT: movdqa %xmm3, %xmm4
7354 ; SSE41-NEXT: pshufb %xmm2, %xmm4
7355 ; SSE41-NEXT: psrlw $4, %xmm0
7356 ; SSE41-NEXT: pand %xmm1, %xmm0
7357 ; SSE41-NEXT: pshufb %xmm0, %xmm3
7358 ; SSE41-NEXT: paddb %xmm4, %xmm3
7359 ; SSE41-NEXT: pxor %xmm0, %xmm0
7360 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
7361 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
7362 ; SSE41-NEXT: psadbw %xmm0, %xmm3
7363 ; SSE41-NEXT: psadbw %xmm0, %xmm1
7364 ; SSE41-NEXT: packuswb %xmm3, %xmm1
7365 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
7366 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
7369 ; AVX1-LABEL: ult_6_v4i32:
7371 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7372 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
7373 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7374 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7375 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
7376 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7377 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7378 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7379 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7380 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7381 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7382 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7383 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7384 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7385 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6]
7386 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7389 ; AVX2-LABEL: ult_6_v4i32:
7391 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7392 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7393 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7394 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7395 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7396 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7397 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7398 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7399 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7400 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7401 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7402 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7403 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7404 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7405 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7406 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7409 ; AVX512VPOPCNTDQ-LABEL: ult_6_v4i32:
7410 ; AVX512VPOPCNTDQ: # %bb.0:
7411 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7412 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7413 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7414 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7415 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7416 ; AVX512VPOPCNTDQ-NEXT: retq
7418 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v4i32:
7419 ; AVX512VPOPCNTDQVL: # %bb.0:
7420 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7421 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7422 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7423 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7424 ; AVX512VPOPCNTDQVL-NEXT: retq
7426 ; BITALG_NOVLX-LABEL: ult_6_v4i32:
7427 ; BITALG_NOVLX: # %bb.0:
7428 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7429 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7430 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7431 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7432 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7433 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7434 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7435 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7436 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7437 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7438 ; BITALG_NOVLX-NEXT: vzeroupper
7439 ; BITALG_NOVLX-NEXT: retq
7441 ; BITALG-LABEL: ult_6_v4i32:
7443 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7444 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7445 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7446 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7447 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7448 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7449 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7450 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7451 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7452 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7454 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7455 %3 = icmp ult <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6>
7456 %4 = sext <4 x i1> %3 to <4 x i32>
7460 define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) {
7461 ; SSE2-LABEL: ugt_6_v4i32:
7463 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7464 ; SSE2-NEXT: psrlw $1, %xmm1
7465 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7466 ; SSE2-NEXT: psubb %xmm1, %xmm0
7467 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7468 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7469 ; SSE2-NEXT: pand %xmm1, %xmm2
7470 ; SSE2-NEXT: psrlw $2, %xmm0
7471 ; SSE2-NEXT: pand %xmm1, %xmm0
7472 ; SSE2-NEXT: paddb %xmm2, %xmm0
7473 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7474 ; SSE2-NEXT: psrlw $4, %xmm1
7475 ; SSE2-NEXT: paddb %xmm1, %xmm0
7476 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7477 ; SSE2-NEXT: pxor %xmm1, %xmm1
7478 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7479 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7480 ; SSE2-NEXT: psadbw %xmm1, %xmm2
7481 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7482 ; SSE2-NEXT: psadbw %xmm1, %xmm0
7483 ; SSE2-NEXT: packuswb %xmm2, %xmm0
7484 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7487 ; SSE3-LABEL: ugt_6_v4i32:
7489 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7490 ; SSE3-NEXT: psrlw $1, %xmm1
7491 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7492 ; SSE3-NEXT: psubb %xmm1, %xmm0
7493 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7494 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7495 ; SSE3-NEXT: pand %xmm1, %xmm2
7496 ; SSE3-NEXT: psrlw $2, %xmm0
7497 ; SSE3-NEXT: pand %xmm1, %xmm0
7498 ; SSE3-NEXT: paddb %xmm2, %xmm0
7499 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7500 ; SSE3-NEXT: psrlw $4, %xmm1
7501 ; SSE3-NEXT: paddb %xmm1, %xmm0
7502 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7503 ; SSE3-NEXT: pxor %xmm1, %xmm1
7504 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7505 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7506 ; SSE3-NEXT: psadbw %xmm1, %xmm2
7507 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7508 ; SSE3-NEXT: psadbw %xmm1, %xmm0
7509 ; SSE3-NEXT: packuswb %xmm2, %xmm0
7510 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7513 ; SSSE3-LABEL: ugt_6_v4i32:
7515 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7516 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
7517 ; SSSE3-NEXT: pand %xmm2, %xmm3
7518 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7519 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
7520 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
7521 ; SSSE3-NEXT: psrlw $4, %xmm0
7522 ; SSSE3-NEXT: pand %xmm2, %xmm0
7523 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
7524 ; SSSE3-NEXT: paddb %xmm4, %xmm1
7525 ; SSSE3-NEXT: pxor %xmm0, %xmm0
7526 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
7527 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7528 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
7529 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7530 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
7531 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
7532 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7533 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
7536 ; SSE41-LABEL: ugt_6_v4i32:
7538 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7539 ; SSE41-NEXT: movdqa %xmm0, %xmm2
7540 ; SSE41-NEXT: pand %xmm1, %xmm2
7541 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7542 ; SSE41-NEXT: movdqa %xmm3, %xmm4
7543 ; SSE41-NEXT: pshufb %xmm2, %xmm4
7544 ; SSE41-NEXT: psrlw $4, %xmm0
7545 ; SSE41-NEXT: pand %xmm1, %xmm0
7546 ; SSE41-NEXT: pshufb %xmm0, %xmm3
7547 ; SSE41-NEXT: paddb %xmm4, %xmm3
7548 ; SSE41-NEXT: pxor %xmm1, %xmm1
7549 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
7550 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
7551 ; SSE41-NEXT: psadbw %xmm1, %xmm3
7552 ; SSE41-NEXT: psadbw %xmm1, %xmm0
7553 ; SSE41-NEXT: packuswb %xmm3, %xmm0
7554 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7557 ; AVX1-LABEL: ugt_6_v4i32:
7559 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7560 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
7561 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7562 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7563 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
7564 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7565 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7566 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7567 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7568 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7569 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7570 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7571 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7572 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7573 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
7576 ; AVX2-LABEL: ugt_6_v4i32:
7578 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7579 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7580 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7581 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7582 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7583 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7584 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7585 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7586 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7587 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7588 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7589 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7590 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7591 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7592 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7593 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7596 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v4i32:
7597 ; AVX512VPOPCNTDQ: # %bb.0:
7598 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7599 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7600 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7601 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7602 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7603 ; AVX512VPOPCNTDQ-NEXT: retq
7605 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v4i32:
7606 ; AVX512VPOPCNTDQVL: # %bb.0:
7607 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7608 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7609 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7610 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7611 ; AVX512VPOPCNTDQVL-NEXT: retq
7613 ; BITALG_NOVLX-LABEL: ugt_6_v4i32:
7614 ; BITALG_NOVLX: # %bb.0:
7615 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7616 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7617 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7618 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7619 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7620 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7621 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7622 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7623 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7624 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7625 ; BITALG_NOVLX-NEXT: vzeroupper
7626 ; BITALG_NOVLX-NEXT: retq
7628 ; BITALG-LABEL: ugt_6_v4i32:
7630 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7631 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7632 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7633 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7634 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7635 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7636 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7637 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7638 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7639 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7641 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7642 %3 = icmp ugt <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6>
7643 %4 = sext <4 x i1> %3 to <4 x i32>
7647 define <4 x i32> @ult_7_v4i32(<4 x i32> %0) {
7648 ; SSE2-LABEL: ult_7_v4i32:
7650 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7651 ; SSE2-NEXT: psrlw $1, %xmm1
7652 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7653 ; SSE2-NEXT: psubb %xmm1, %xmm0
7654 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7655 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7656 ; SSE2-NEXT: pand %xmm1, %xmm2
7657 ; SSE2-NEXT: psrlw $2, %xmm0
7658 ; SSE2-NEXT: pand %xmm1, %xmm0
7659 ; SSE2-NEXT: paddb %xmm2, %xmm0
7660 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7661 ; SSE2-NEXT: psrlw $4, %xmm1
7662 ; SSE2-NEXT: paddb %xmm0, %xmm1
7663 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7664 ; SSE2-NEXT: pxor %xmm0, %xmm0
7665 ; SSE2-NEXT: movdqa %xmm1, %xmm2
7666 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7667 ; SSE2-NEXT: psadbw %xmm0, %xmm2
7668 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7669 ; SSE2-NEXT: psadbw %xmm0, %xmm1
7670 ; SSE2-NEXT: packuswb %xmm2, %xmm1
7671 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
7672 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
7675 ; SSE3-LABEL: ult_7_v4i32:
7677 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7678 ; SSE3-NEXT: psrlw $1, %xmm1
7679 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7680 ; SSE3-NEXT: psubb %xmm1, %xmm0
7681 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7682 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7683 ; SSE3-NEXT: pand %xmm1, %xmm2
7684 ; SSE3-NEXT: psrlw $2, %xmm0
7685 ; SSE3-NEXT: pand %xmm1, %xmm0
7686 ; SSE3-NEXT: paddb %xmm2, %xmm0
7687 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7688 ; SSE3-NEXT: psrlw $4, %xmm1
7689 ; SSE3-NEXT: paddb %xmm0, %xmm1
7690 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7691 ; SSE3-NEXT: pxor %xmm0, %xmm0
7692 ; SSE3-NEXT: movdqa %xmm1, %xmm2
7693 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7694 ; SSE3-NEXT: psadbw %xmm0, %xmm2
7695 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7696 ; SSE3-NEXT: psadbw %xmm0, %xmm1
7697 ; SSE3-NEXT: packuswb %xmm2, %xmm1
7698 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
7699 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
7702 ; SSSE3-LABEL: ult_7_v4i32:
7704 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7705 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
7706 ; SSSE3-NEXT: pand %xmm1, %xmm2
7707 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7708 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
7709 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
7710 ; SSSE3-NEXT: psrlw $4, %xmm0
7711 ; SSSE3-NEXT: pand %xmm1, %xmm0
7712 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
7713 ; SSSE3-NEXT: paddb %xmm4, %xmm3
7714 ; SSSE3-NEXT: pxor %xmm0, %xmm0
7715 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
7716 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
7717 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
7718 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
7719 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
7720 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
7721 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
7722 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
7725 ; SSE41-LABEL: ult_7_v4i32:
7727 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7728 ; SSE41-NEXT: movdqa %xmm0, %xmm2
7729 ; SSE41-NEXT: pand %xmm1, %xmm2
7730 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7731 ; SSE41-NEXT: movdqa %xmm3, %xmm4
7732 ; SSE41-NEXT: pshufb %xmm2, %xmm4
7733 ; SSE41-NEXT: psrlw $4, %xmm0
7734 ; SSE41-NEXT: pand %xmm1, %xmm0
7735 ; SSE41-NEXT: pshufb %xmm0, %xmm3
7736 ; SSE41-NEXT: paddb %xmm4, %xmm3
7737 ; SSE41-NEXT: pxor %xmm0, %xmm0
7738 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
7739 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
7740 ; SSE41-NEXT: psadbw %xmm0, %xmm3
7741 ; SSE41-NEXT: psadbw %xmm0, %xmm1
7742 ; SSE41-NEXT: packuswb %xmm3, %xmm1
7743 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
7744 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
7747 ; AVX1-LABEL: ult_7_v4i32:
7749 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7750 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
7751 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7752 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7753 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
7754 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7755 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7756 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7757 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7758 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7759 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7760 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7761 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7762 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7763 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7]
7764 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7767 ; AVX2-LABEL: ult_7_v4i32:
7769 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7770 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7771 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7772 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7773 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7774 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7775 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7776 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7777 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7778 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7779 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7780 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7781 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7782 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7783 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7784 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7787 ; AVX512VPOPCNTDQ-LABEL: ult_7_v4i32:
7788 ; AVX512VPOPCNTDQ: # %bb.0:
7789 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7790 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7791 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7792 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7793 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7794 ; AVX512VPOPCNTDQ-NEXT: retq
7796 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v4i32:
7797 ; AVX512VPOPCNTDQVL: # %bb.0:
7798 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7799 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7800 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7801 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7802 ; AVX512VPOPCNTDQVL-NEXT: retq
7804 ; BITALG_NOVLX-LABEL: ult_7_v4i32:
7805 ; BITALG_NOVLX: # %bb.0:
7806 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7807 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7808 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7809 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7810 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7811 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7812 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7813 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7814 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7815 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7816 ; BITALG_NOVLX-NEXT: vzeroupper
7817 ; BITALG_NOVLX-NEXT: retq
7819 ; BITALG-LABEL: ult_7_v4i32:
7821 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7822 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7823 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7824 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7825 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7826 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7827 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7828 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7829 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7830 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7832 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7833 %3 = icmp ult <4 x i32> %2, <i32 7, i32 7, i32 7, i32 7>
7834 %4 = sext <4 x i1> %3 to <4 x i32>
7838 define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) {
7839 ; SSE2-LABEL: ugt_7_v4i32:
7841 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7842 ; SSE2-NEXT: psrlw $1, %xmm1
7843 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7844 ; SSE2-NEXT: psubb %xmm1, %xmm0
7845 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7846 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7847 ; SSE2-NEXT: pand %xmm1, %xmm2
7848 ; SSE2-NEXT: psrlw $2, %xmm0
7849 ; SSE2-NEXT: pand %xmm1, %xmm0
7850 ; SSE2-NEXT: paddb %xmm2, %xmm0
7851 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7852 ; SSE2-NEXT: psrlw $4, %xmm1
7853 ; SSE2-NEXT: paddb %xmm1, %xmm0
7854 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7855 ; SSE2-NEXT: pxor %xmm1, %xmm1
7856 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7857 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7858 ; SSE2-NEXT: psadbw %xmm1, %xmm2
7859 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7860 ; SSE2-NEXT: psadbw %xmm1, %xmm0
7861 ; SSE2-NEXT: packuswb %xmm2, %xmm0
7862 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7865 ; SSE3-LABEL: ugt_7_v4i32:
7867 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7868 ; SSE3-NEXT: psrlw $1, %xmm1
7869 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7870 ; SSE3-NEXT: psubb %xmm1, %xmm0
7871 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7872 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7873 ; SSE3-NEXT: pand %xmm1, %xmm2
7874 ; SSE3-NEXT: psrlw $2, %xmm0
7875 ; SSE3-NEXT: pand %xmm1, %xmm0
7876 ; SSE3-NEXT: paddb %xmm2, %xmm0
7877 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7878 ; SSE3-NEXT: psrlw $4, %xmm1
7879 ; SSE3-NEXT: paddb %xmm1, %xmm0
7880 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7881 ; SSE3-NEXT: pxor %xmm1, %xmm1
7882 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7883 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7884 ; SSE3-NEXT: psadbw %xmm1, %xmm2
7885 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7886 ; SSE3-NEXT: psadbw %xmm1, %xmm0
7887 ; SSE3-NEXT: packuswb %xmm2, %xmm0
7888 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7891 ; SSSE3-LABEL: ugt_7_v4i32:
7893 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7894 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
7895 ; SSSE3-NEXT: pand %xmm2, %xmm3
7896 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7897 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
7898 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
7899 ; SSSE3-NEXT: psrlw $4, %xmm0
7900 ; SSSE3-NEXT: pand %xmm2, %xmm0
7901 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
7902 ; SSSE3-NEXT: paddb %xmm4, %xmm1
7903 ; SSSE3-NEXT: pxor %xmm0, %xmm0
7904 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
7905 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7906 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
7907 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7908 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
7909 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
7910 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7911 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
7914 ; SSE41-LABEL: ugt_7_v4i32:
7916 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7917 ; SSE41-NEXT: movdqa %xmm0, %xmm2
7918 ; SSE41-NEXT: pand %xmm1, %xmm2
7919 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7920 ; SSE41-NEXT: movdqa %xmm3, %xmm4
7921 ; SSE41-NEXT: pshufb %xmm2, %xmm4
7922 ; SSE41-NEXT: psrlw $4, %xmm0
7923 ; SSE41-NEXT: pand %xmm1, %xmm0
7924 ; SSE41-NEXT: pshufb %xmm0, %xmm3
7925 ; SSE41-NEXT: paddb %xmm4, %xmm3
7926 ; SSE41-NEXT: pxor %xmm1, %xmm1
7927 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
7928 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
7929 ; SSE41-NEXT: psadbw %xmm1, %xmm3
7930 ; SSE41-NEXT: psadbw %xmm1, %xmm0
7931 ; SSE41-NEXT: packuswb %xmm3, %xmm0
7932 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7935 ; AVX1-LABEL: ugt_7_v4i32:
7937 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7938 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
7939 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7940 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7941 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
7942 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7943 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7944 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7945 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7946 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7947 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7948 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7949 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7950 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7951 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
7954 ; AVX2-LABEL: ugt_7_v4i32:
7956 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7957 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7958 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7959 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7960 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7961 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7962 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7963 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7964 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7965 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7966 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7967 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7968 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7969 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7970 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7971 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7974 ; AVX512VPOPCNTDQ-LABEL: ugt_7_v4i32:
7975 ; AVX512VPOPCNTDQ: # %bb.0:
7976 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7977 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7978 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7979 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7980 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7981 ; AVX512VPOPCNTDQ-NEXT: retq
7983 ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v4i32:
7984 ; AVX512VPOPCNTDQVL: # %bb.0:
7985 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7986 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
7987 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
7988 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
7989 ; AVX512VPOPCNTDQVL-NEXT: retq
7991 ; BITALG_NOVLX-LABEL: ugt_7_v4i32:
7992 ; BITALG_NOVLX: # %bb.0:
7993 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7994 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7995 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7996 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7997 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7998 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7999 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8000 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8001 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
8002 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8003 ; BITALG_NOVLX-NEXT: vzeroupper
8004 ; BITALG_NOVLX-NEXT: retq
8006 ; BITALG-LABEL: ugt_7_v4i32:
8008 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8009 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8010 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8011 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8012 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8013 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8014 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8015 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8016 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8017 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8019 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8020 %3 = icmp ugt <4 x i32> %2, <i32 7, i32 7, i32 7, i32 7>
8021 %4 = sext <4 x i1> %3 to <4 x i32>
8025 define <4 x i32> @ult_8_v4i32(<4 x i32> %0) {
8026 ; SSE2-LABEL: ult_8_v4i32:
8028 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8029 ; SSE2-NEXT: psrlw $1, %xmm1
8030 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8031 ; SSE2-NEXT: psubb %xmm1, %xmm0
8032 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8033 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8034 ; SSE2-NEXT: pand %xmm1, %xmm2
8035 ; SSE2-NEXT: psrlw $2, %xmm0
8036 ; SSE2-NEXT: pand %xmm1, %xmm0
8037 ; SSE2-NEXT: paddb %xmm2, %xmm0
8038 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8039 ; SSE2-NEXT: psrlw $4, %xmm1
8040 ; SSE2-NEXT: paddb %xmm0, %xmm1
8041 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8042 ; SSE2-NEXT: pxor %xmm0, %xmm0
8043 ; SSE2-NEXT: movdqa %xmm1, %xmm2
8044 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8045 ; SSE2-NEXT: psadbw %xmm0, %xmm2
8046 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8047 ; SSE2-NEXT: psadbw %xmm0, %xmm1
8048 ; SSE2-NEXT: packuswb %xmm2, %xmm1
8049 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
8050 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
8053 ; SSE3-LABEL: ult_8_v4i32:
8055 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8056 ; SSE3-NEXT: psrlw $1, %xmm1
8057 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8058 ; SSE3-NEXT: psubb %xmm1, %xmm0
8059 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8060 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8061 ; SSE3-NEXT: pand %xmm1, %xmm2
8062 ; SSE3-NEXT: psrlw $2, %xmm0
8063 ; SSE3-NEXT: pand %xmm1, %xmm0
8064 ; SSE3-NEXT: paddb %xmm2, %xmm0
8065 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8066 ; SSE3-NEXT: psrlw $4, %xmm1
8067 ; SSE3-NEXT: paddb %xmm0, %xmm1
8068 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8069 ; SSE3-NEXT: pxor %xmm0, %xmm0
8070 ; SSE3-NEXT: movdqa %xmm1, %xmm2
8071 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8072 ; SSE3-NEXT: psadbw %xmm0, %xmm2
8073 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8074 ; SSE3-NEXT: psadbw %xmm0, %xmm1
8075 ; SSE3-NEXT: packuswb %xmm2, %xmm1
8076 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
8077 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
8080 ; SSSE3-LABEL: ult_8_v4i32:
8082 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8083 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
8084 ; SSSE3-NEXT: pand %xmm1, %xmm2
8085 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8086 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
8087 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
8088 ; SSSE3-NEXT: psrlw $4, %xmm0
8089 ; SSSE3-NEXT: pand %xmm1, %xmm0
8090 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
8091 ; SSSE3-NEXT: paddb %xmm4, %xmm3
8092 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8093 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
8094 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
8095 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8096 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
8097 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
8098 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
8099 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
8100 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
8103 ; SSE41-LABEL: ult_8_v4i32:
8105 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8106 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8107 ; SSE41-NEXT: pand %xmm1, %xmm2
8108 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8109 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8110 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8111 ; SSE41-NEXT: psrlw $4, %xmm0
8112 ; SSE41-NEXT: pand %xmm1, %xmm0
8113 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8114 ; SSE41-NEXT: paddb %xmm4, %xmm3
8115 ; SSE41-NEXT: pxor %xmm0, %xmm0
8116 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
8117 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
8118 ; SSE41-NEXT: psadbw %xmm0, %xmm3
8119 ; SSE41-NEXT: psadbw %xmm0, %xmm1
8120 ; SSE41-NEXT: packuswb %xmm3, %xmm1
8121 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
8122 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
8125 ; AVX1-LABEL: ult_8_v4i32:
8127 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8128 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8129 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8130 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8131 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8132 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8133 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8134 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8135 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8136 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8137 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8138 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8139 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8140 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8141 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8]
8142 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8145 ; AVX2-LABEL: ult_8_v4i32:
8147 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8148 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
8149 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8150 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8151 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
8152 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
8153 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8154 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8155 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
8156 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8157 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8158 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8159 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8160 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8161 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8162 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8165 ; AVX512VPOPCNTDQ-LABEL: ult_8_v4i32:
8166 ; AVX512VPOPCNTDQ: # %bb.0:
8167 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8168 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
8169 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8170 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8171 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
8172 ; AVX512VPOPCNTDQ-NEXT: retq
8174 ; AVX512VPOPCNTDQVL-LABEL: ult_8_v4i32:
8175 ; AVX512VPOPCNTDQVL: # %bb.0:
8176 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
8177 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8178 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8179 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8180 ; AVX512VPOPCNTDQVL-NEXT: retq
8182 ; BITALG_NOVLX-LABEL: ult_8_v4i32:
8183 ; BITALG_NOVLX: # %bb.0:
8184 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8185 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
8186 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
8187 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8188 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8189 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8190 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8191 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8192 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8193 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8194 ; BITALG_NOVLX-NEXT: vzeroupper
8195 ; BITALG_NOVLX-NEXT: retq
8197 ; BITALG-LABEL: ult_8_v4i32:
8199 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8200 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8201 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8202 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8203 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8204 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8205 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8206 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8207 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8208 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8210 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8211 %3 = icmp ult <4 x i32> %2, <i32 8, i32 8, i32 8, i32 8>
8212 %4 = sext <4 x i1> %3 to <4 x i32>
8216 define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) {
8217 ; SSE2-LABEL: ugt_8_v4i32:
8219 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8220 ; SSE2-NEXT: psrlw $1, %xmm1
8221 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8222 ; SSE2-NEXT: psubb %xmm1, %xmm0
8223 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8224 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8225 ; SSE2-NEXT: pand %xmm1, %xmm2
8226 ; SSE2-NEXT: psrlw $2, %xmm0
8227 ; SSE2-NEXT: pand %xmm1, %xmm0
8228 ; SSE2-NEXT: paddb %xmm2, %xmm0
8229 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8230 ; SSE2-NEXT: psrlw $4, %xmm1
8231 ; SSE2-NEXT: paddb %xmm1, %xmm0
8232 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8233 ; SSE2-NEXT: pxor %xmm1, %xmm1
8234 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8235 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8236 ; SSE2-NEXT: psadbw %xmm1, %xmm2
8237 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8238 ; SSE2-NEXT: psadbw %xmm1, %xmm0
8239 ; SSE2-NEXT: packuswb %xmm2, %xmm0
8240 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8243 ; SSE3-LABEL: ugt_8_v4i32:
8245 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8246 ; SSE3-NEXT: psrlw $1, %xmm1
8247 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8248 ; SSE3-NEXT: psubb %xmm1, %xmm0
8249 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8250 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8251 ; SSE3-NEXT: pand %xmm1, %xmm2
8252 ; SSE3-NEXT: psrlw $2, %xmm0
8253 ; SSE3-NEXT: pand %xmm1, %xmm0
8254 ; SSE3-NEXT: paddb %xmm2, %xmm0
8255 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8256 ; SSE3-NEXT: psrlw $4, %xmm1
8257 ; SSE3-NEXT: paddb %xmm1, %xmm0
8258 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8259 ; SSE3-NEXT: pxor %xmm1, %xmm1
8260 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8261 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8262 ; SSE3-NEXT: psadbw %xmm1, %xmm2
8263 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8264 ; SSE3-NEXT: psadbw %xmm1, %xmm0
8265 ; SSE3-NEXT: packuswb %xmm2, %xmm0
8266 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8269 ; SSSE3-LABEL: ugt_8_v4i32:
8271 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8272 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
8273 ; SSSE3-NEXT: pand %xmm2, %xmm3
8274 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8275 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
8276 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
8277 ; SSSE3-NEXT: psrlw $4, %xmm0
8278 ; SSSE3-NEXT: pand %xmm2, %xmm0
8279 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
8280 ; SSSE3-NEXT: paddb %xmm4, %xmm1
8281 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8282 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
8283 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8284 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
8285 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8286 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8287 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
8288 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8289 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
8292 ; SSE41-LABEL: ugt_8_v4i32:
8294 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8295 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8296 ; SSE41-NEXT: pand %xmm1, %xmm2
8297 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8298 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8299 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8300 ; SSE41-NEXT: psrlw $4, %xmm0
8301 ; SSE41-NEXT: pand %xmm1, %xmm0
8302 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8303 ; SSE41-NEXT: paddb %xmm4, %xmm3
8304 ; SSE41-NEXT: pxor %xmm1, %xmm1
8305 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
8306 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
8307 ; SSE41-NEXT: psadbw %xmm1, %xmm3
8308 ; SSE41-NEXT: psadbw %xmm1, %xmm0
8309 ; SSE41-NEXT: packuswb %xmm3, %xmm0
8310 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8313 ; AVX1-LABEL: ugt_8_v4i32:
8315 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8316 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8317 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8318 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8319 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8320 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8321 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8322 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8323 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8324 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8325 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8326 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8327 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8328 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8329 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
8332 ; AVX2-LABEL: ugt_8_v4i32:
8334 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8335 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
8336 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8337 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8338 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
8339 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
8340 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8341 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8342 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
8343 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8344 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8345 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8346 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8347 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8348 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8349 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8352 ; AVX512VPOPCNTDQ-LABEL: ugt_8_v4i32:
8353 ; AVX512VPOPCNTDQ: # %bb.0:
8354 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8355 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
8356 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8357 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8358 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
8359 ; AVX512VPOPCNTDQ-NEXT: retq
8361 ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v4i32:
8362 ; AVX512VPOPCNTDQVL: # %bb.0:
8363 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
8364 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8365 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8366 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8367 ; AVX512VPOPCNTDQVL-NEXT: retq
8369 ; BITALG_NOVLX-LABEL: ugt_8_v4i32:
8370 ; BITALG_NOVLX: # %bb.0:
8371 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8372 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
8373 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
8374 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8375 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8376 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8377 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8378 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8379 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8380 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8381 ; BITALG_NOVLX-NEXT: vzeroupper
8382 ; BITALG_NOVLX-NEXT: retq
8384 ; BITALG-LABEL: ugt_8_v4i32:
8386 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8387 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8388 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8389 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8390 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8391 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8392 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8393 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8394 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8395 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8397 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8398 %3 = icmp ugt <4 x i32> %2, <i32 8, i32 8, i32 8, i32 8>
8399 %4 = sext <4 x i1> %3 to <4 x i32>
8403 define <4 x i32> @ult_9_v4i32(<4 x i32> %0) {
8404 ; SSE2-LABEL: ult_9_v4i32:
8406 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8407 ; SSE2-NEXT: psrlw $1, %xmm1
8408 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8409 ; SSE2-NEXT: psubb %xmm1, %xmm0
8410 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8411 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8412 ; SSE2-NEXT: pand %xmm1, %xmm2
8413 ; SSE2-NEXT: psrlw $2, %xmm0
8414 ; SSE2-NEXT: pand %xmm1, %xmm0
8415 ; SSE2-NEXT: paddb %xmm2, %xmm0
8416 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8417 ; SSE2-NEXT: psrlw $4, %xmm1
8418 ; SSE2-NEXT: paddb %xmm0, %xmm1
8419 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8420 ; SSE2-NEXT: pxor %xmm0, %xmm0
8421 ; SSE2-NEXT: movdqa %xmm1, %xmm2
8422 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8423 ; SSE2-NEXT: psadbw %xmm0, %xmm2
8424 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8425 ; SSE2-NEXT: psadbw %xmm0, %xmm1
8426 ; SSE2-NEXT: packuswb %xmm2, %xmm1
8427 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9]
8428 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
8431 ; SSE3-LABEL: ult_9_v4i32:
8433 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8434 ; SSE3-NEXT: psrlw $1, %xmm1
8435 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8436 ; SSE3-NEXT: psubb %xmm1, %xmm0
8437 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8438 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8439 ; SSE3-NEXT: pand %xmm1, %xmm2
8440 ; SSE3-NEXT: psrlw $2, %xmm0
8441 ; SSE3-NEXT: pand %xmm1, %xmm0
8442 ; SSE3-NEXT: paddb %xmm2, %xmm0
8443 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8444 ; SSE3-NEXT: psrlw $4, %xmm1
8445 ; SSE3-NEXT: paddb %xmm0, %xmm1
8446 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8447 ; SSE3-NEXT: pxor %xmm0, %xmm0
8448 ; SSE3-NEXT: movdqa %xmm1, %xmm2
8449 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8450 ; SSE3-NEXT: psadbw %xmm0, %xmm2
8451 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8452 ; SSE3-NEXT: psadbw %xmm0, %xmm1
8453 ; SSE3-NEXT: packuswb %xmm2, %xmm1
8454 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9]
8455 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
8458 ; SSSE3-LABEL: ult_9_v4i32:
8460 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8461 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
8462 ; SSSE3-NEXT: pand %xmm1, %xmm2
8463 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8464 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
8465 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
8466 ; SSSE3-NEXT: psrlw $4, %xmm0
8467 ; SSSE3-NEXT: pand %xmm1, %xmm0
8468 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
8469 ; SSSE3-NEXT: paddb %xmm4, %xmm3
8470 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8471 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
8472 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
8473 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8474 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
8475 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
8476 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
8477 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9]
8478 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
8481 ; SSE41-LABEL: ult_9_v4i32:
8483 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8484 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8485 ; SSE41-NEXT: pand %xmm1, %xmm2
8486 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8487 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8488 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8489 ; SSE41-NEXT: psrlw $4, %xmm0
8490 ; SSE41-NEXT: pand %xmm1, %xmm0
8491 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8492 ; SSE41-NEXT: paddb %xmm4, %xmm3
8493 ; SSE41-NEXT: pxor %xmm0, %xmm0
8494 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
8495 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
8496 ; SSE41-NEXT: psadbw %xmm0, %xmm3
8497 ; SSE41-NEXT: psadbw %xmm0, %xmm1
8498 ; SSE41-NEXT: packuswb %xmm3, %xmm1
8499 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9]
8500 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
8503 ; AVX1-LABEL: ult_9_v4i32:
8505 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8506 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8507 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8508 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8509 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8510 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8511 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8512 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8513 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8514 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8515 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8516 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8517 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8518 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8519 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9]
8520 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8523 ; AVX2-LABEL: ult_9_v4i32:
8525 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8526 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
8527 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8528 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8529 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
8530 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
8531 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8532 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8533 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
8534 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8535 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8536 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8537 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8538 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8539 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8540 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8543 ; AVX512VPOPCNTDQ-LABEL: ult_9_v4i32:
8544 ; AVX512VPOPCNTDQ: # %bb.0:
8545 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8546 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
8547 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8548 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8549 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
8550 ; AVX512VPOPCNTDQ-NEXT: retq
8552 ; AVX512VPOPCNTDQVL-LABEL: ult_9_v4i32:
8553 ; AVX512VPOPCNTDQVL: # %bb.0:
8554 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
8555 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8556 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8557 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8558 ; AVX512VPOPCNTDQVL-NEXT: retq
8560 ; BITALG_NOVLX-LABEL: ult_9_v4i32:
8561 ; BITALG_NOVLX: # %bb.0:
8562 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8563 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
8564 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
8565 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8566 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8567 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8568 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8569 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8570 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8571 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8572 ; BITALG_NOVLX-NEXT: vzeroupper
8573 ; BITALG_NOVLX-NEXT: retq
8575 ; BITALG-LABEL: ult_9_v4i32:
8577 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8578 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8579 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8580 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8581 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8582 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8583 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8584 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8585 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8586 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8588 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8589 %3 = icmp ult <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
8590 %4 = sext <4 x i1> %3 to <4 x i32>
8594 define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) {
8595 ; SSE2-LABEL: ugt_9_v4i32:
8597 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8598 ; SSE2-NEXT: psrlw $1, %xmm1
8599 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8600 ; SSE2-NEXT: psubb %xmm1, %xmm0
8601 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8602 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8603 ; SSE2-NEXT: pand %xmm1, %xmm2
8604 ; SSE2-NEXT: psrlw $2, %xmm0
8605 ; SSE2-NEXT: pand %xmm1, %xmm0
8606 ; SSE2-NEXT: paddb %xmm2, %xmm0
8607 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8608 ; SSE2-NEXT: psrlw $4, %xmm1
8609 ; SSE2-NEXT: paddb %xmm1, %xmm0
8610 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8611 ; SSE2-NEXT: pxor %xmm1, %xmm1
8612 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8613 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8614 ; SSE2-NEXT: psadbw %xmm1, %xmm2
8615 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8616 ; SSE2-NEXT: psadbw %xmm1, %xmm0
8617 ; SSE2-NEXT: packuswb %xmm2, %xmm0
8618 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8621 ; SSE3-LABEL: ugt_9_v4i32:
8623 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8624 ; SSE3-NEXT: psrlw $1, %xmm1
8625 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8626 ; SSE3-NEXT: psubb %xmm1, %xmm0
8627 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8628 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8629 ; SSE3-NEXT: pand %xmm1, %xmm2
8630 ; SSE3-NEXT: psrlw $2, %xmm0
8631 ; SSE3-NEXT: pand %xmm1, %xmm0
8632 ; SSE3-NEXT: paddb %xmm2, %xmm0
8633 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8634 ; SSE3-NEXT: psrlw $4, %xmm1
8635 ; SSE3-NEXT: paddb %xmm1, %xmm0
8636 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8637 ; SSE3-NEXT: pxor %xmm1, %xmm1
8638 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8639 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8640 ; SSE3-NEXT: psadbw %xmm1, %xmm2
8641 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8642 ; SSE3-NEXT: psadbw %xmm1, %xmm0
8643 ; SSE3-NEXT: packuswb %xmm2, %xmm0
8644 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8647 ; SSSE3-LABEL: ugt_9_v4i32:
8649 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8650 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
8651 ; SSSE3-NEXT: pand %xmm2, %xmm3
8652 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8653 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
8654 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
8655 ; SSSE3-NEXT: psrlw $4, %xmm0
8656 ; SSSE3-NEXT: pand %xmm2, %xmm0
8657 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
8658 ; SSSE3-NEXT: paddb %xmm4, %xmm1
8659 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8660 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
8661 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8662 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
8663 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8664 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8665 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
8666 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8667 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
8670 ; SSE41-LABEL: ugt_9_v4i32:
8672 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8673 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8674 ; SSE41-NEXT: pand %xmm1, %xmm2
8675 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8676 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8677 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8678 ; SSE41-NEXT: psrlw $4, %xmm0
8679 ; SSE41-NEXT: pand %xmm1, %xmm0
8680 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8681 ; SSE41-NEXT: paddb %xmm4, %xmm3
8682 ; SSE41-NEXT: pxor %xmm1, %xmm1
8683 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
8684 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
8685 ; SSE41-NEXT: psadbw %xmm1, %xmm3
8686 ; SSE41-NEXT: psadbw %xmm1, %xmm0
8687 ; SSE41-NEXT: packuswb %xmm3, %xmm0
8688 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8691 ; AVX1-LABEL: ugt_9_v4i32:
8693 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8694 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8695 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8696 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8697 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8698 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8699 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8700 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8701 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8702 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8703 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8704 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8705 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8706 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8707 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
8710 ; AVX2-LABEL: ugt_9_v4i32:
8712 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8713 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
8714 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8715 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8716 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
8717 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
8718 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8719 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8720 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
8721 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8722 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8723 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8724 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8725 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8726 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8727 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8730 ; AVX512VPOPCNTDQ-LABEL: ugt_9_v4i32:
8731 ; AVX512VPOPCNTDQ: # %bb.0:
8732 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8733 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
8734 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8735 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8736 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
8737 ; AVX512VPOPCNTDQ-NEXT: retq
8739 ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v4i32:
8740 ; AVX512VPOPCNTDQVL: # %bb.0:
8741 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
8742 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8743 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8744 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8745 ; AVX512VPOPCNTDQVL-NEXT: retq
8747 ; BITALG_NOVLX-LABEL: ugt_9_v4i32:
8748 ; BITALG_NOVLX: # %bb.0:
8749 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8750 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
8751 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
8752 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8753 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8754 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8755 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8756 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8757 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8758 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8759 ; BITALG_NOVLX-NEXT: vzeroupper
8760 ; BITALG_NOVLX-NEXT: retq
8762 ; BITALG-LABEL: ugt_9_v4i32:
8764 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8765 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8766 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8767 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8768 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8769 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8770 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8771 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8772 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8773 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8775 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8776 %3 = icmp ugt <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
8777 %4 = sext <4 x i1> %3 to <4 x i32>
8781 define <4 x i32> @ult_10_v4i32(<4 x i32> %0) {
8782 ; SSE2-LABEL: ult_10_v4i32:
8784 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8785 ; SSE2-NEXT: psrlw $1, %xmm1
8786 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8787 ; SSE2-NEXT: psubb %xmm1, %xmm0
8788 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8789 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8790 ; SSE2-NEXT: pand %xmm1, %xmm2
8791 ; SSE2-NEXT: psrlw $2, %xmm0
8792 ; SSE2-NEXT: pand %xmm1, %xmm0
8793 ; SSE2-NEXT: paddb %xmm2, %xmm0
8794 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8795 ; SSE2-NEXT: psrlw $4, %xmm1
8796 ; SSE2-NEXT: paddb %xmm0, %xmm1
8797 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8798 ; SSE2-NEXT: pxor %xmm0, %xmm0
8799 ; SSE2-NEXT: movdqa %xmm1, %xmm2
8800 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8801 ; SSE2-NEXT: psadbw %xmm0, %xmm2
8802 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8803 ; SSE2-NEXT: psadbw %xmm0, %xmm1
8804 ; SSE2-NEXT: packuswb %xmm2, %xmm1
8805 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
8806 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
8809 ; SSE3-LABEL: ult_10_v4i32:
8811 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8812 ; SSE3-NEXT: psrlw $1, %xmm1
8813 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8814 ; SSE3-NEXT: psubb %xmm1, %xmm0
8815 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8816 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8817 ; SSE3-NEXT: pand %xmm1, %xmm2
8818 ; SSE3-NEXT: psrlw $2, %xmm0
8819 ; SSE3-NEXT: pand %xmm1, %xmm0
8820 ; SSE3-NEXT: paddb %xmm2, %xmm0
8821 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8822 ; SSE3-NEXT: psrlw $4, %xmm1
8823 ; SSE3-NEXT: paddb %xmm0, %xmm1
8824 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8825 ; SSE3-NEXT: pxor %xmm0, %xmm0
8826 ; SSE3-NEXT: movdqa %xmm1, %xmm2
8827 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8828 ; SSE3-NEXT: psadbw %xmm0, %xmm2
8829 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8830 ; SSE3-NEXT: psadbw %xmm0, %xmm1
8831 ; SSE3-NEXT: packuswb %xmm2, %xmm1
8832 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
8833 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
8836 ; SSSE3-LABEL: ult_10_v4i32:
8838 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8839 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
8840 ; SSSE3-NEXT: pand %xmm1, %xmm2
8841 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8842 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
8843 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
8844 ; SSSE3-NEXT: psrlw $4, %xmm0
8845 ; SSSE3-NEXT: pand %xmm1, %xmm0
8846 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
8847 ; SSSE3-NEXT: paddb %xmm4, %xmm3
8848 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8849 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
8850 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
8851 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8852 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
8853 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
8854 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
8855 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
8856 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
8859 ; SSE41-LABEL: ult_10_v4i32:
8861 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8862 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8863 ; SSE41-NEXT: pand %xmm1, %xmm2
8864 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8865 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8866 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8867 ; SSE41-NEXT: psrlw $4, %xmm0
8868 ; SSE41-NEXT: pand %xmm1, %xmm0
8869 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8870 ; SSE41-NEXT: paddb %xmm4, %xmm3
8871 ; SSE41-NEXT: pxor %xmm0, %xmm0
8872 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
8873 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
8874 ; SSE41-NEXT: psadbw %xmm0, %xmm3
8875 ; SSE41-NEXT: psadbw %xmm0, %xmm1
8876 ; SSE41-NEXT: packuswb %xmm3, %xmm1
8877 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
8878 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
8881 ; AVX1-LABEL: ult_10_v4i32:
8883 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8884 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8885 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8886 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8887 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8888 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8889 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8890 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8891 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8892 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8893 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8894 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8895 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8896 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8897 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10]
8898 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8901 ; AVX2-LABEL: ult_10_v4i32:
8903 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8904 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
8905 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8906 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8907 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
8908 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
8909 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8910 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8911 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
8912 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8913 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8914 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8915 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8916 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8917 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8918 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8921 ; AVX512VPOPCNTDQ-LABEL: ult_10_v4i32:
8922 ; AVX512VPOPCNTDQ: # %bb.0:
8923 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8924 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
8925 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8926 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8927 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
8928 ; AVX512VPOPCNTDQ-NEXT: retq
8930 ; AVX512VPOPCNTDQVL-LABEL: ult_10_v4i32:
8931 ; AVX512VPOPCNTDQVL: # %bb.0:
8932 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
8933 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8934 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8935 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8936 ; AVX512VPOPCNTDQVL-NEXT: retq
8938 ; BITALG_NOVLX-LABEL: ult_10_v4i32:
8939 ; BITALG_NOVLX: # %bb.0:
8940 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8941 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
8942 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
8943 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8944 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8945 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8946 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8947 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8948 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8949 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8950 ; BITALG_NOVLX-NEXT: vzeroupper
8951 ; BITALG_NOVLX-NEXT: retq
8953 ; BITALG-LABEL: ult_10_v4i32:
8955 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8956 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8957 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8958 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8959 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8960 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8961 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8962 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
8963 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
8964 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
8966 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8967 %3 = icmp ult <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
8968 %4 = sext <4 x i1> %3 to <4 x i32>
8972 define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) {
8973 ; SSE2-LABEL: ugt_10_v4i32:
8975 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8976 ; SSE2-NEXT: psrlw $1, %xmm1
8977 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8978 ; SSE2-NEXT: psubb %xmm1, %xmm0
8979 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8980 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8981 ; SSE2-NEXT: pand %xmm1, %xmm2
8982 ; SSE2-NEXT: psrlw $2, %xmm0
8983 ; SSE2-NEXT: pand %xmm1, %xmm0
8984 ; SSE2-NEXT: paddb %xmm2, %xmm0
8985 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8986 ; SSE2-NEXT: psrlw $4, %xmm1
8987 ; SSE2-NEXT: paddb %xmm1, %xmm0
8988 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8989 ; SSE2-NEXT: pxor %xmm1, %xmm1
8990 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8991 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8992 ; SSE2-NEXT: psadbw %xmm1, %xmm2
8993 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8994 ; SSE2-NEXT: psadbw %xmm1, %xmm0
8995 ; SSE2-NEXT: packuswb %xmm2, %xmm0
8996 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8999 ; SSE3-LABEL: ugt_10_v4i32:
9001 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9002 ; SSE3-NEXT: psrlw $1, %xmm1
9003 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9004 ; SSE3-NEXT: psubb %xmm1, %xmm0
9005 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9006 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9007 ; SSE3-NEXT: pand %xmm1, %xmm2
9008 ; SSE3-NEXT: psrlw $2, %xmm0
9009 ; SSE3-NEXT: pand %xmm1, %xmm0
9010 ; SSE3-NEXT: paddb %xmm2, %xmm0
9011 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9012 ; SSE3-NEXT: psrlw $4, %xmm1
9013 ; SSE3-NEXT: paddb %xmm1, %xmm0
9014 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9015 ; SSE3-NEXT: pxor %xmm1, %xmm1
9016 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9017 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
9018 ; SSE3-NEXT: psadbw %xmm1, %xmm2
9019 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
9020 ; SSE3-NEXT: psadbw %xmm1, %xmm0
9021 ; SSE3-NEXT: packuswb %xmm2, %xmm0
9022 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9025 ; SSSE3-LABEL: ugt_10_v4i32:
9027 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9028 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
9029 ; SSSE3-NEXT: pand %xmm2, %xmm3
9030 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9031 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
9032 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
9033 ; SSSE3-NEXT: psrlw $4, %xmm0
9034 ; SSSE3-NEXT: pand %xmm2, %xmm0
9035 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
9036 ; SSSE3-NEXT: paddb %xmm4, %xmm1
9037 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9038 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
9039 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9040 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
9041 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9042 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9043 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
9044 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9045 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
9048 ; SSE41-LABEL: ugt_10_v4i32:
9050 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9051 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9052 ; SSE41-NEXT: pand %xmm1, %xmm2
9053 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9054 ; SSE41-NEXT: movdqa %xmm3, %xmm4
9055 ; SSE41-NEXT: pshufb %xmm2, %xmm4
9056 ; SSE41-NEXT: psrlw $4, %xmm0
9057 ; SSE41-NEXT: pand %xmm1, %xmm0
9058 ; SSE41-NEXT: pshufb %xmm0, %xmm3
9059 ; SSE41-NEXT: paddb %xmm4, %xmm3
9060 ; SSE41-NEXT: pxor %xmm1, %xmm1
9061 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
9062 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
9063 ; SSE41-NEXT: psadbw %xmm1, %xmm3
9064 ; SSE41-NEXT: psadbw %xmm1, %xmm0
9065 ; SSE41-NEXT: packuswb %xmm3, %xmm0
9066 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9069 ; AVX1-LABEL: ugt_10_v4i32:
9071 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9072 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
9073 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9074 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9075 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
9076 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
9077 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9078 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9079 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
9080 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9081 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9082 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9083 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9084 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9085 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
9088 ; AVX2-LABEL: ugt_10_v4i32:
9090 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9091 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9092 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9093 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9094 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9095 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9096 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9097 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9098 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9099 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9100 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9101 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9102 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9103 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9104 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9105 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9108 ; AVX512VPOPCNTDQ-LABEL: ugt_10_v4i32:
9109 ; AVX512VPOPCNTDQ: # %bb.0:
9110 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9111 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9112 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9113 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9114 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9115 ; AVX512VPOPCNTDQ-NEXT: retq
9117 ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v4i32:
9118 ; AVX512VPOPCNTDQVL: # %bb.0:
9119 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9120 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
9121 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
9122 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9123 ; AVX512VPOPCNTDQVL-NEXT: retq
9125 ; BITALG_NOVLX-LABEL: ugt_10_v4i32:
9126 ; BITALG_NOVLX: # %bb.0:
9127 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9128 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9129 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9130 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9131 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9132 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9133 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9134 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9135 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9136 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9137 ; BITALG_NOVLX-NEXT: vzeroupper
9138 ; BITALG_NOVLX-NEXT: retq
9140 ; BITALG-LABEL: ugt_10_v4i32:
9142 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9143 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9144 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9145 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9146 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9147 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9148 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9149 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
9150 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
9151 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9153 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9154 %3 = icmp ugt <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
9155 %4 = sext <4 x i1> %3 to <4 x i32>
9159 define <4 x i32> @ult_11_v4i32(<4 x i32> %0) {
9160 ; SSE2-LABEL: ult_11_v4i32:
9162 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9163 ; SSE2-NEXT: psrlw $1, %xmm1
9164 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9165 ; SSE2-NEXT: psubb %xmm1, %xmm0
9166 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9167 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9168 ; SSE2-NEXT: pand %xmm1, %xmm2
9169 ; SSE2-NEXT: psrlw $2, %xmm0
9170 ; SSE2-NEXT: pand %xmm1, %xmm0
9171 ; SSE2-NEXT: paddb %xmm2, %xmm0
9172 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9173 ; SSE2-NEXT: psrlw $4, %xmm1
9174 ; SSE2-NEXT: paddb %xmm0, %xmm1
9175 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9176 ; SSE2-NEXT: pxor %xmm0, %xmm0
9177 ; SSE2-NEXT: movdqa %xmm1, %xmm2
9178 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9179 ; SSE2-NEXT: psadbw %xmm0, %xmm2
9180 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9181 ; SSE2-NEXT: psadbw %xmm0, %xmm1
9182 ; SSE2-NEXT: packuswb %xmm2, %xmm1
9183 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11]
9184 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
9187 ; SSE3-LABEL: ult_11_v4i32:
9189 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9190 ; SSE3-NEXT: psrlw $1, %xmm1
9191 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9192 ; SSE3-NEXT: psubb %xmm1, %xmm0
9193 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9194 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9195 ; SSE3-NEXT: pand %xmm1, %xmm2
9196 ; SSE3-NEXT: psrlw $2, %xmm0
9197 ; SSE3-NEXT: pand %xmm1, %xmm0
9198 ; SSE3-NEXT: paddb %xmm2, %xmm0
9199 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9200 ; SSE3-NEXT: psrlw $4, %xmm1
9201 ; SSE3-NEXT: paddb %xmm0, %xmm1
9202 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9203 ; SSE3-NEXT: pxor %xmm0, %xmm0
9204 ; SSE3-NEXT: movdqa %xmm1, %xmm2
9205 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9206 ; SSE3-NEXT: psadbw %xmm0, %xmm2
9207 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9208 ; SSE3-NEXT: psadbw %xmm0, %xmm1
9209 ; SSE3-NEXT: packuswb %xmm2, %xmm1
9210 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11]
9211 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
9214 ; SSSE3-LABEL: ult_11_v4i32:
9216 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9217 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
9218 ; SSSE3-NEXT: pand %xmm1, %xmm2
9219 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9220 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
9221 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
9222 ; SSSE3-NEXT: psrlw $4, %xmm0
9223 ; SSSE3-NEXT: pand %xmm1, %xmm0
9224 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
9225 ; SSSE3-NEXT: paddb %xmm4, %xmm3
9226 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9227 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
9228 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
9229 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9230 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
9231 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
9232 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
9233 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11]
9234 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
9237 ; SSE41-LABEL: ult_11_v4i32:
9239 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9240 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9241 ; SSE41-NEXT: pand %xmm1, %xmm2
9242 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9243 ; SSE41-NEXT: movdqa %xmm3, %xmm4
9244 ; SSE41-NEXT: pshufb %xmm2, %xmm4
9245 ; SSE41-NEXT: psrlw $4, %xmm0
9246 ; SSE41-NEXT: pand %xmm1, %xmm0
9247 ; SSE41-NEXT: pshufb %xmm0, %xmm3
9248 ; SSE41-NEXT: paddb %xmm4, %xmm3
9249 ; SSE41-NEXT: pxor %xmm0, %xmm0
9250 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
9251 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
9252 ; SSE41-NEXT: psadbw %xmm0, %xmm3
9253 ; SSE41-NEXT: psadbw %xmm0, %xmm1
9254 ; SSE41-NEXT: packuswb %xmm3, %xmm1
9255 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11]
9256 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
9259 ; AVX1-LABEL: ult_11_v4i32:
9261 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9262 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
9263 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9264 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9265 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
9266 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
9267 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9268 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9269 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
9270 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9271 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9272 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9273 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9274 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9275 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11]
9276 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9279 ; AVX2-LABEL: ult_11_v4i32:
9281 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9282 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9283 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9284 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9285 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9286 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9287 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9288 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9289 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9290 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9291 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9292 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9293 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9294 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9295 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9296 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9299 ; AVX512VPOPCNTDQ-LABEL: ult_11_v4i32:
9300 ; AVX512VPOPCNTDQ: # %bb.0:
9301 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9302 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9303 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9304 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9305 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9306 ; AVX512VPOPCNTDQ-NEXT: retq
9308 ; AVX512VPOPCNTDQVL-LABEL: ult_11_v4i32:
9309 ; AVX512VPOPCNTDQVL: # %bb.0:
9310 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9311 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
9312 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
9313 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9314 ; AVX512VPOPCNTDQVL-NEXT: retq
9316 ; BITALG_NOVLX-LABEL: ult_11_v4i32:
9317 ; BITALG_NOVLX: # %bb.0:
9318 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9319 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9320 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9321 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9322 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9323 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9324 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9325 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9326 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9327 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9328 ; BITALG_NOVLX-NEXT: vzeroupper
9329 ; BITALG_NOVLX-NEXT: retq
9331 ; BITALG-LABEL: ult_11_v4i32:
9333 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9334 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9335 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9336 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9337 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9338 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9339 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9340 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
9341 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
9342 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9344 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9345 %3 = icmp ult <4 x i32> %2, <i32 11, i32 11, i32 11, i32 11>
9346 %4 = sext <4 x i1> %3 to <4 x i32>
9350 define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) {
9351 ; SSE2-LABEL: ugt_11_v4i32:
9353 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9354 ; SSE2-NEXT: psrlw $1, %xmm1
9355 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9356 ; SSE2-NEXT: psubb %xmm1, %xmm0
9357 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9358 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9359 ; SSE2-NEXT: pand %xmm1, %xmm2
9360 ; SSE2-NEXT: psrlw $2, %xmm0
9361 ; SSE2-NEXT: pand %xmm1, %xmm0
9362 ; SSE2-NEXT: paddb %xmm2, %xmm0
9363 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9364 ; SSE2-NEXT: psrlw $4, %xmm1
9365 ; SSE2-NEXT: paddb %xmm1, %xmm0
9366 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9367 ; SSE2-NEXT: pxor %xmm1, %xmm1
9368 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9369 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
9370 ; SSE2-NEXT: psadbw %xmm1, %xmm2
9371 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
9372 ; SSE2-NEXT: psadbw %xmm1, %xmm0
9373 ; SSE2-NEXT: packuswb %xmm2, %xmm0
9374 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9377 ; SSE3-LABEL: ugt_11_v4i32:
9379 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9380 ; SSE3-NEXT: psrlw $1, %xmm1
9381 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9382 ; SSE3-NEXT: psubb %xmm1, %xmm0
9383 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9384 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9385 ; SSE3-NEXT: pand %xmm1, %xmm2
9386 ; SSE3-NEXT: psrlw $2, %xmm0
9387 ; SSE3-NEXT: pand %xmm1, %xmm0
9388 ; SSE3-NEXT: paddb %xmm2, %xmm0
9389 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9390 ; SSE3-NEXT: psrlw $4, %xmm1
9391 ; SSE3-NEXT: paddb %xmm1, %xmm0
9392 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9393 ; SSE3-NEXT: pxor %xmm1, %xmm1
9394 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9395 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
9396 ; SSE3-NEXT: psadbw %xmm1, %xmm2
9397 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
9398 ; SSE3-NEXT: psadbw %xmm1, %xmm0
9399 ; SSE3-NEXT: packuswb %xmm2, %xmm0
9400 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9403 ; SSSE3-LABEL: ugt_11_v4i32:
9405 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9406 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
9407 ; SSSE3-NEXT: pand %xmm2, %xmm3
9408 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9409 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
9410 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
9411 ; SSSE3-NEXT: psrlw $4, %xmm0
9412 ; SSSE3-NEXT: pand %xmm2, %xmm0
9413 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
9414 ; SSSE3-NEXT: paddb %xmm4, %xmm1
9415 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9416 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
9417 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9418 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
9419 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9420 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9421 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
9422 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9423 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
9426 ; SSE41-LABEL: ugt_11_v4i32:
9428 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9429 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9430 ; SSE41-NEXT: pand %xmm1, %xmm2
9431 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9432 ; SSE41-NEXT: movdqa %xmm3, %xmm4
9433 ; SSE41-NEXT: pshufb %xmm2, %xmm4
9434 ; SSE41-NEXT: psrlw $4, %xmm0
9435 ; SSE41-NEXT: pand %xmm1, %xmm0
9436 ; SSE41-NEXT: pshufb %xmm0, %xmm3
9437 ; SSE41-NEXT: paddb %xmm4, %xmm3
9438 ; SSE41-NEXT: pxor %xmm1, %xmm1
9439 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
9440 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
9441 ; SSE41-NEXT: psadbw %xmm1, %xmm3
9442 ; SSE41-NEXT: psadbw %xmm1, %xmm0
9443 ; SSE41-NEXT: packuswb %xmm3, %xmm0
9444 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9447 ; AVX1-LABEL: ugt_11_v4i32:
9449 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9450 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
9451 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9452 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9453 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
9454 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
9455 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9456 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9457 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
9458 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9459 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9460 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9461 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9462 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9463 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
9466 ; AVX2-LABEL: ugt_11_v4i32:
9468 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9469 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9470 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9471 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9472 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9473 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9474 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9475 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9476 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9477 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9478 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9479 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9480 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9481 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9482 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9483 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9486 ; AVX512VPOPCNTDQ-LABEL: ugt_11_v4i32:
9487 ; AVX512VPOPCNTDQ: # %bb.0:
9488 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9489 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9490 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9491 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9492 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9493 ; AVX512VPOPCNTDQ-NEXT: retq
9495 ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v4i32:
9496 ; AVX512VPOPCNTDQVL: # %bb.0:
9497 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9498 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
9499 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
9500 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9501 ; AVX512VPOPCNTDQVL-NEXT: retq
9503 ; BITALG_NOVLX-LABEL: ugt_11_v4i32:
9504 ; BITALG_NOVLX: # %bb.0:
9505 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9506 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9507 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9508 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9509 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9510 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9511 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9512 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9513 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9514 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9515 ; BITALG_NOVLX-NEXT: vzeroupper
9516 ; BITALG_NOVLX-NEXT: retq
9518 ; BITALG-LABEL: ugt_11_v4i32:
9520 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9521 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9522 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9523 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9524 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9525 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9526 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9527 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
9528 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
9529 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9531 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9532 %3 = icmp ugt <4 x i32> %2, <i32 11, i32 11, i32 11, i32 11>
9533 %4 = sext <4 x i1> %3 to <4 x i32>
9537 define <4 x i32> @ult_12_v4i32(<4 x i32> %0) {
9538 ; SSE2-LABEL: ult_12_v4i32:
9540 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9541 ; SSE2-NEXT: psrlw $1, %xmm1
9542 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9543 ; SSE2-NEXT: psubb %xmm1, %xmm0
9544 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9545 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9546 ; SSE2-NEXT: pand %xmm1, %xmm2
9547 ; SSE2-NEXT: psrlw $2, %xmm0
9548 ; SSE2-NEXT: pand %xmm1, %xmm0
9549 ; SSE2-NEXT: paddb %xmm2, %xmm0
9550 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9551 ; SSE2-NEXT: psrlw $4, %xmm1
9552 ; SSE2-NEXT: paddb %xmm0, %xmm1
9553 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9554 ; SSE2-NEXT: pxor %xmm0, %xmm0
9555 ; SSE2-NEXT: movdqa %xmm1, %xmm2
9556 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9557 ; SSE2-NEXT: psadbw %xmm0, %xmm2
9558 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9559 ; SSE2-NEXT: psadbw %xmm0, %xmm1
9560 ; SSE2-NEXT: packuswb %xmm2, %xmm1
9561 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12]
9562 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
9565 ; SSE3-LABEL: ult_12_v4i32:
9567 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9568 ; SSE3-NEXT: psrlw $1, %xmm1
9569 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9570 ; SSE3-NEXT: psubb %xmm1, %xmm0
9571 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9572 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9573 ; SSE3-NEXT: pand %xmm1, %xmm2
9574 ; SSE3-NEXT: psrlw $2, %xmm0
9575 ; SSE3-NEXT: pand %xmm1, %xmm0
9576 ; SSE3-NEXT: paddb %xmm2, %xmm0
9577 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9578 ; SSE3-NEXT: psrlw $4, %xmm1
9579 ; SSE3-NEXT: paddb %xmm0, %xmm1
9580 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9581 ; SSE3-NEXT: pxor %xmm0, %xmm0
9582 ; SSE3-NEXT: movdqa %xmm1, %xmm2
9583 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9584 ; SSE3-NEXT: psadbw %xmm0, %xmm2
9585 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9586 ; SSE3-NEXT: psadbw %xmm0, %xmm1
9587 ; SSE3-NEXT: packuswb %xmm2, %xmm1
9588 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12]
9589 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
9592 ; SSSE3-LABEL: ult_12_v4i32:
9594 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9595 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
9596 ; SSSE3-NEXT: pand %xmm1, %xmm2
9597 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9598 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
9599 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
9600 ; SSSE3-NEXT: psrlw $4, %xmm0
9601 ; SSSE3-NEXT: pand %xmm1, %xmm0
9602 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
9603 ; SSSE3-NEXT: paddb %xmm4, %xmm3
9604 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9605 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
9606 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
9607 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9608 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
9609 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
9610 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
9611 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12]
9612 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
9615 ; SSE41-LABEL: ult_12_v4i32:
9617 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9618 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9619 ; SSE41-NEXT: pand %xmm1, %xmm2
9620 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9621 ; SSE41-NEXT: movdqa %xmm3, %xmm4
9622 ; SSE41-NEXT: pshufb %xmm2, %xmm4
9623 ; SSE41-NEXT: psrlw $4, %xmm0
9624 ; SSE41-NEXT: pand %xmm1, %xmm0
9625 ; SSE41-NEXT: pshufb %xmm0, %xmm3
9626 ; SSE41-NEXT: paddb %xmm4, %xmm3
9627 ; SSE41-NEXT: pxor %xmm0, %xmm0
9628 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
9629 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
9630 ; SSE41-NEXT: psadbw %xmm0, %xmm3
9631 ; SSE41-NEXT: psadbw %xmm0, %xmm1
9632 ; SSE41-NEXT: packuswb %xmm3, %xmm1
9633 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12]
9634 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
9637 ; AVX1-LABEL: ult_12_v4i32:
9639 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9640 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
9641 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9642 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9643 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
9644 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
9645 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9646 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9647 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
9648 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9649 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9650 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9651 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9652 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9653 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12]
9654 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9657 ; AVX2-LABEL: ult_12_v4i32:
9659 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9660 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9661 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9662 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9663 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9664 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9665 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9666 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9667 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9668 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9669 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9670 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9671 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9672 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9673 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9674 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9677 ; AVX512VPOPCNTDQ-LABEL: ult_12_v4i32:
9678 ; AVX512VPOPCNTDQ: # %bb.0:
9679 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9680 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9681 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9682 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9683 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9684 ; AVX512VPOPCNTDQ-NEXT: retq
9686 ; AVX512VPOPCNTDQVL-LABEL: ult_12_v4i32:
9687 ; AVX512VPOPCNTDQVL: # %bb.0:
9688 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9689 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
9690 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
9691 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9692 ; AVX512VPOPCNTDQVL-NEXT: retq
9694 ; BITALG_NOVLX-LABEL: ult_12_v4i32:
9695 ; BITALG_NOVLX: # %bb.0:
9696 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9697 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9698 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9699 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9700 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9701 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9702 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9703 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9704 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9705 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9706 ; BITALG_NOVLX-NEXT: vzeroupper
9707 ; BITALG_NOVLX-NEXT: retq
9709 ; BITALG-LABEL: ult_12_v4i32:
9711 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9712 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9713 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9714 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9715 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9716 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9717 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9718 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
9719 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
9720 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9722 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9723 %3 = icmp ult <4 x i32> %2, <i32 12, i32 12, i32 12, i32 12>
9724 %4 = sext <4 x i1> %3 to <4 x i32>
9728 define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) {
9729 ; SSE2-LABEL: ugt_12_v4i32:
9731 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9732 ; SSE2-NEXT: psrlw $1, %xmm1
9733 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9734 ; SSE2-NEXT: psubb %xmm1, %xmm0
9735 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9736 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9737 ; SSE2-NEXT: pand %xmm1, %xmm2
9738 ; SSE2-NEXT: psrlw $2, %xmm0
9739 ; SSE2-NEXT: pand %xmm1, %xmm0
9740 ; SSE2-NEXT: paddb %xmm2, %xmm0
9741 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9742 ; SSE2-NEXT: psrlw $4, %xmm1
9743 ; SSE2-NEXT: paddb %xmm1, %xmm0
9744 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9745 ; SSE2-NEXT: pxor %xmm1, %xmm1
9746 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9747 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
9748 ; SSE2-NEXT: psadbw %xmm1, %xmm2
9749 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
9750 ; SSE2-NEXT: psadbw %xmm1, %xmm0
9751 ; SSE2-NEXT: packuswb %xmm2, %xmm0
9752 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9755 ; SSE3-LABEL: ugt_12_v4i32:
9757 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9758 ; SSE3-NEXT: psrlw $1, %xmm1
9759 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9760 ; SSE3-NEXT: psubb %xmm1, %xmm0
9761 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9762 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9763 ; SSE3-NEXT: pand %xmm1, %xmm2
9764 ; SSE3-NEXT: psrlw $2, %xmm0
9765 ; SSE3-NEXT: pand %xmm1, %xmm0
9766 ; SSE3-NEXT: paddb %xmm2, %xmm0
9767 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9768 ; SSE3-NEXT: psrlw $4, %xmm1
9769 ; SSE3-NEXT: paddb %xmm1, %xmm0
9770 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9771 ; SSE3-NEXT: pxor %xmm1, %xmm1
9772 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9773 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
9774 ; SSE3-NEXT: psadbw %xmm1, %xmm2
9775 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
9776 ; SSE3-NEXT: psadbw %xmm1, %xmm0
9777 ; SSE3-NEXT: packuswb %xmm2, %xmm0
9778 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9781 ; SSSE3-LABEL: ugt_12_v4i32:
9783 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9784 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
9785 ; SSSE3-NEXT: pand %xmm2, %xmm3
9786 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9787 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
9788 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
9789 ; SSSE3-NEXT: psrlw $4, %xmm0
9790 ; SSSE3-NEXT: pand %xmm2, %xmm0
9791 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
9792 ; SSSE3-NEXT: paddb %xmm4, %xmm1
9793 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9794 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
9795 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9796 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
9797 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9798 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9799 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
9800 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9801 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
9804 ; SSE41-LABEL: ugt_12_v4i32:
9806 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9807 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9808 ; SSE41-NEXT: pand %xmm1, %xmm2
9809 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9810 ; SSE41-NEXT: movdqa %xmm3, %xmm4
9811 ; SSE41-NEXT: pshufb %xmm2, %xmm4
9812 ; SSE41-NEXT: psrlw $4, %xmm0
9813 ; SSE41-NEXT: pand %xmm1, %xmm0
9814 ; SSE41-NEXT: pshufb %xmm0, %xmm3
9815 ; SSE41-NEXT: paddb %xmm4, %xmm3
9816 ; SSE41-NEXT: pxor %xmm1, %xmm1
9817 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
9818 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
9819 ; SSE41-NEXT: psadbw %xmm1, %xmm3
9820 ; SSE41-NEXT: psadbw %xmm1, %xmm0
9821 ; SSE41-NEXT: packuswb %xmm3, %xmm0
9822 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9825 ; AVX1-LABEL: ugt_12_v4i32:
9827 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9828 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
9829 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9830 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9831 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
9832 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
9833 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9834 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9835 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
9836 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9837 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9838 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9839 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9840 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9841 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
9844 ; AVX2-LABEL: ugt_12_v4i32:
9846 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9847 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9848 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9849 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9850 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9851 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9852 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9853 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9854 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9855 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9856 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9857 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9858 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9859 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9860 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9861 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9864 ; AVX512VPOPCNTDQ-LABEL: ugt_12_v4i32:
9865 ; AVX512VPOPCNTDQ: # %bb.0:
9866 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9867 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9868 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9869 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9870 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9871 ; AVX512VPOPCNTDQ-NEXT: retq
9873 ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v4i32:
9874 ; AVX512VPOPCNTDQVL: # %bb.0:
9875 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9876 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
9877 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
9878 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9879 ; AVX512VPOPCNTDQVL-NEXT: retq
9881 ; BITALG_NOVLX-LABEL: ugt_12_v4i32:
9882 ; BITALG_NOVLX: # %bb.0:
9883 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9884 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9885 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9886 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9887 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9888 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9889 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9890 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9891 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9892 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9893 ; BITALG_NOVLX-NEXT: vzeroupper
9894 ; BITALG_NOVLX-NEXT: retq
9896 ; BITALG-LABEL: ugt_12_v4i32:
9898 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9899 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9900 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9901 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9902 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9903 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9904 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9905 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
9906 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
9907 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
9909 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9910 %3 = icmp ugt <4 x i32> %2, <i32 12, i32 12, i32 12, i32 12>
9911 %4 = sext <4 x i1> %3 to <4 x i32>
9915 define <4 x i32> @ult_13_v4i32(<4 x i32> %0) {
9916 ; SSE2-LABEL: ult_13_v4i32:
9918 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9919 ; SSE2-NEXT: psrlw $1, %xmm1
9920 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9921 ; SSE2-NEXT: psubb %xmm1, %xmm0
9922 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9923 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9924 ; SSE2-NEXT: pand %xmm1, %xmm2
9925 ; SSE2-NEXT: psrlw $2, %xmm0
9926 ; SSE2-NEXT: pand %xmm1, %xmm0
9927 ; SSE2-NEXT: paddb %xmm2, %xmm0
9928 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9929 ; SSE2-NEXT: psrlw $4, %xmm1
9930 ; SSE2-NEXT: paddb %xmm0, %xmm1
9931 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9932 ; SSE2-NEXT: pxor %xmm0, %xmm0
9933 ; SSE2-NEXT: movdqa %xmm1, %xmm2
9934 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9935 ; SSE2-NEXT: psadbw %xmm0, %xmm2
9936 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9937 ; SSE2-NEXT: psadbw %xmm0, %xmm1
9938 ; SSE2-NEXT: packuswb %xmm2, %xmm1
9939 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13]
9940 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
9943 ; SSE3-LABEL: ult_13_v4i32:
9945 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9946 ; SSE3-NEXT: psrlw $1, %xmm1
9947 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9948 ; SSE3-NEXT: psubb %xmm1, %xmm0
9949 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9950 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9951 ; SSE3-NEXT: pand %xmm1, %xmm2
9952 ; SSE3-NEXT: psrlw $2, %xmm0
9953 ; SSE3-NEXT: pand %xmm1, %xmm0
9954 ; SSE3-NEXT: paddb %xmm2, %xmm0
9955 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9956 ; SSE3-NEXT: psrlw $4, %xmm1
9957 ; SSE3-NEXT: paddb %xmm0, %xmm1
9958 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9959 ; SSE3-NEXT: pxor %xmm0, %xmm0
9960 ; SSE3-NEXT: movdqa %xmm1, %xmm2
9961 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9962 ; SSE3-NEXT: psadbw %xmm0, %xmm2
9963 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9964 ; SSE3-NEXT: psadbw %xmm0, %xmm1
9965 ; SSE3-NEXT: packuswb %xmm2, %xmm1
9966 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13]
9967 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
9970 ; SSSE3-LABEL: ult_13_v4i32:
9972 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9973 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
9974 ; SSSE3-NEXT: pand %xmm1, %xmm2
9975 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9976 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
9977 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
9978 ; SSSE3-NEXT: psrlw $4, %xmm0
9979 ; SSSE3-NEXT: pand %xmm1, %xmm0
9980 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
9981 ; SSSE3-NEXT: paddb %xmm4, %xmm3
9982 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9983 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
9984 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
9985 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9986 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
9987 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
9988 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
9989 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13]
9990 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
9993 ; SSE41-LABEL: ult_13_v4i32:
9995 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9996 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9997 ; SSE41-NEXT: pand %xmm1, %xmm2
9998 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9999 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10000 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10001 ; SSE41-NEXT: psrlw $4, %xmm0
10002 ; SSE41-NEXT: pand %xmm1, %xmm0
10003 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10004 ; SSE41-NEXT: paddb %xmm4, %xmm3
10005 ; SSE41-NEXT: pxor %xmm0, %xmm0
10006 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
10007 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
10008 ; SSE41-NEXT: psadbw %xmm0, %xmm3
10009 ; SSE41-NEXT: psadbw %xmm0, %xmm1
10010 ; SSE41-NEXT: packuswb %xmm3, %xmm1
10011 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13]
10012 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
10015 ; AVX1-LABEL: ult_13_v4i32:
10017 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10018 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10019 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10020 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10021 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10022 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10023 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10024 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10025 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10026 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10027 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10028 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10029 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10030 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10031 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13]
10032 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10035 ; AVX2-LABEL: ult_13_v4i32:
10037 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10038 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10039 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10040 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10041 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10042 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10043 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10044 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10045 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10046 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10047 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10048 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10049 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10050 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10051 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10052 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10055 ; AVX512VPOPCNTDQ-LABEL: ult_13_v4i32:
10056 ; AVX512VPOPCNTDQ: # %bb.0:
10057 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10058 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
10059 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10060 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10061 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
10062 ; AVX512VPOPCNTDQ-NEXT: retq
10064 ; AVX512VPOPCNTDQVL-LABEL: ult_13_v4i32:
10065 ; AVX512VPOPCNTDQVL: # %bb.0:
10066 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
10067 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
10068 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
10069 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10070 ; AVX512VPOPCNTDQVL-NEXT: retq
10072 ; BITALG_NOVLX-LABEL: ult_13_v4i32:
10073 ; BITALG_NOVLX: # %bb.0:
10074 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10075 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
10076 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
10077 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10078 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10079 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10080 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10081 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10082 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10083 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10084 ; BITALG_NOVLX-NEXT: vzeroupper
10085 ; BITALG_NOVLX-NEXT: retq
10087 ; BITALG-LABEL: ult_13_v4i32:
10089 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
10090 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
10091 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10092 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10093 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10094 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10095 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10096 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
10097 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
10098 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10099 ; BITALG-NEXT: retq
10100 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10101 %3 = icmp ult <4 x i32> %2, <i32 13, i32 13, i32 13, i32 13>
10102 %4 = sext <4 x i1> %3 to <4 x i32>
10106 define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) {
10107 ; SSE2-LABEL: ugt_13_v4i32:
10109 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10110 ; SSE2-NEXT: psrlw $1, %xmm1
10111 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10112 ; SSE2-NEXT: psubb %xmm1, %xmm0
10113 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10114 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10115 ; SSE2-NEXT: pand %xmm1, %xmm2
10116 ; SSE2-NEXT: psrlw $2, %xmm0
10117 ; SSE2-NEXT: pand %xmm1, %xmm0
10118 ; SSE2-NEXT: paddb %xmm2, %xmm0
10119 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10120 ; SSE2-NEXT: psrlw $4, %xmm1
10121 ; SSE2-NEXT: paddb %xmm1, %xmm0
10122 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10123 ; SSE2-NEXT: pxor %xmm1, %xmm1
10124 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10125 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10126 ; SSE2-NEXT: psadbw %xmm1, %xmm2
10127 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10128 ; SSE2-NEXT: psadbw %xmm1, %xmm0
10129 ; SSE2-NEXT: packuswb %xmm2, %xmm0
10130 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10133 ; SSE3-LABEL: ugt_13_v4i32:
10135 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10136 ; SSE3-NEXT: psrlw $1, %xmm1
10137 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10138 ; SSE3-NEXT: psubb %xmm1, %xmm0
10139 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10140 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10141 ; SSE3-NEXT: pand %xmm1, %xmm2
10142 ; SSE3-NEXT: psrlw $2, %xmm0
10143 ; SSE3-NEXT: pand %xmm1, %xmm0
10144 ; SSE3-NEXT: paddb %xmm2, %xmm0
10145 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10146 ; SSE3-NEXT: psrlw $4, %xmm1
10147 ; SSE3-NEXT: paddb %xmm1, %xmm0
10148 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10149 ; SSE3-NEXT: pxor %xmm1, %xmm1
10150 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10151 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10152 ; SSE3-NEXT: psadbw %xmm1, %xmm2
10153 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10154 ; SSE3-NEXT: psadbw %xmm1, %xmm0
10155 ; SSE3-NEXT: packuswb %xmm2, %xmm0
10156 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10159 ; SSSE3-LABEL: ugt_13_v4i32:
10161 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10162 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
10163 ; SSSE3-NEXT: pand %xmm2, %xmm3
10164 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10165 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
10166 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
10167 ; SSSE3-NEXT: psrlw $4, %xmm0
10168 ; SSSE3-NEXT: pand %xmm2, %xmm0
10169 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
10170 ; SSSE3-NEXT: paddb %xmm4, %xmm1
10171 ; SSSE3-NEXT: pxor %xmm0, %xmm0
10172 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
10173 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10174 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
10175 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10176 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
10177 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
10178 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10179 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
10182 ; SSE41-LABEL: ugt_13_v4i32:
10184 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10185 ; SSE41-NEXT: movdqa %xmm0, %xmm2
10186 ; SSE41-NEXT: pand %xmm1, %xmm2
10187 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10188 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10189 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10190 ; SSE41-NEXT: psrlw $4, %xmm0
10191 ; SSE41-NEXT: pand %xmm1, %xmm0
10192 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10193 ; SSE41-NEXT: paddb %xmm4, %xmm3
10194 ; SSE41-NEXT: pxor %xmm1, %xmm1
10195 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
10196 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
10197 ; SSE41-NEXT: psadbw %xmm1, %xmm3
10198 ; SSE41-NEXT: psadbw %xmm1, %xmm0
10199 ; SSE41-NEXT: packuswb %xmm3, %xmm0
10200 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10203 ; AVX1-LABEL: ugt_13_v4i32:
10205 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10206 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10207 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10208 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10209 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10210 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10211 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10212 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10213 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10214 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10215 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10216 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10217 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10218 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10219 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
10222 ; AVX2-LABEL: ugt_13_v4i32:
10224 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10225 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10226 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10227 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10228 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10229 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10230 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10231 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10232 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10233 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10234 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10235 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10236 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10237 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10238 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10239 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10242 ; AVX512VPOPCNTDQ-LABEL: ugt_13_v4i32:
10243 ; AVX512VPOPCNTDQ: # %bb.0:
10244 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10245 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
10246 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10247 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10248 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
10249 ; AVX512VPOPCNTDQ-NEXT: retq
10251 ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v4i32:
10252 ; AVX512VPOPCNTDQVL: # %bb.0:
10253 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
10254 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
10255 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
10256 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10257 ; AVX512VPOPCNTDQVL-NEXT: retq
10259 ; BITALG_NOVLX-LABEL: ugt_13_v4i32:
10260 ; BITALG_NOVLX: # %bb.0:
10261 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10262 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
10263 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
10264 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10265 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10266 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10267 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10268 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10269 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10270 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10271 ; BITALG_NOVLX-NEXT: vzeroupper
10272 ; BITALG_NOVLX-NEXT: retq
10274 ; BITALG-LABEL: ugt_13_v4i32:
10276 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
10277 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
10278 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10279 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10280 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10281 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10282 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10283 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
10284 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
10285 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10286 ; BITALG-NEXT: retq
10287 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10288 %3 = icmp ugt <4 x i32> %2, <i32 13, i32 13, i32 13, i32 13>
10289 %4 = sext <4 x i1> %3 to <4 x i32>
10293 define <4 x i32> @ult_14_v4i32(<4 x i32> %0) {
10294 ; SSE2-LABEL: ult_14_v4i32:
10296 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10297 ; SSE2-NEXT: psrlw $1, %xmm1
10298 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10299 ; SSE2-NEXT: psubb %xmm1, %xmm0
10300 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10301 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10302 ; SSE2-NEXT: pand %xmm1, %xmm2
10303 ; SSE2-NEXT: psrlw $2, %xmm0
10304 ; SSE2-NEXT: pand %xmm1, %xmm0
10305 ; SSE2-NEXT: paddb %xmm2, %xmm0
10306 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10307 ; SSE2-NEXT: psrlw $4, %xmm1
10308 ; SSE2-NEXT: paddb %xmm0, %xmm1
10309 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10310 ; SSE2-NEXT: pxor %xmm0, %xmm0
10311 ; SSE2-NEXT: movdqa %xmm1, %xmm2
10312 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10313 ; SSE2-NEXT: psadbw %xmm0, %xmm2
10314 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10315 ; SSE2-NEXT: psadbw %xmm0, %xmm1
10316 ; SSE2-NEXT: packuswb %xmm2, %xmm1
10317 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14]
10318 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
10321 ; SSE3-LABEL: ult_14_v4i32:
10323 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10324 ; SSE3-NEXT: psrlw $1, %xmm1
10325 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10326 ; SSE3-NEXT: psubb %xmm1, %xmm0
10327 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10328 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10329 ; SSE3-NEXT: pand %xmm1, %xmm2
10330 ; SSE3-NEXT: psrlw $2, %xmm0
10331 ; SSE3-NEXT: pand %xmm1, %xmm0
10332 ; SSE3-NEXT: paddb %xmm2, %xmm0
10333 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10334 ; SSE3-NEXT: psrlw $4, %xmm1
10335 ; SSE3-NEXT: paddb %xmm0, %xmm1
10336 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10337 ; SSE3-NEXT: pxor %xmm0, %xmm0
10338 ; SSE3-NEXT: movdqa %xmm1, %xmm2
10339 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10340 ; SSE3-NEXT: psadbw %xmm0, %xmm2
10341 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10342 ; SSE3-NEXT: psadbw %xmm0, %xmm1
10343 ; SSE3-NEXT: packuswb %xmm2, %xmm1
10344 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14]
10345 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
10348 ; SSSE3-LABEL: ult_14_v4i32:
10350 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10351 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
10352 ; SSSE3-NEXT: pand %xmm1, %xmm2
10353 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10354 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
10355 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
10356 ; SSSE3-NEXT: psrlw $4, %xmm0
10357 ; SSSE3-NEXT: pand %xmm1, %xmm0
10358 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
10359 ; SSSE3-NEXT: paddb %xmm4, %xmm3
10360 ; SSSE3-NEXT: pxor %xmm0, %xmm0
10361 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
10362 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
10363 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
10364 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
10365 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
10366 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
10367 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14]
10368 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
10371 ; SSE41-LABEL: ult_14_v4i32:
10373 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10374 ; SSE41-NEXT: movdqa %xmm0, %xmm2
10375 ; SSE41-NEXT: pand %xmm1, %xmm2
10376 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10377 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10378 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10379 ; SSE41-NEXT: psrlw $4, %xmm0
10380 ; SSE41-NEXT: pand %xmm1, %xmm0
10381 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10382 ; SSE41-NEXT: paddb %xmm4, %xmm3
10383 ; SSE41-NEXT: pxor %xmm0, %xmm0
10384 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
10385 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
10386 ; SSE41-NEXT: psadbw %xmm0, %xmm3
10387 ; SSE41-NEXT: psadbw %xmm0, %xmm1
10388 ; SSE41-NEXT: packuswb %xmm3, %xmm1
10389 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14]
10390 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
10393 ; AVX1-LABEL: ult_14_v4i32:
10395 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10396 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10397 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10398 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10399 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10400 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10401 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10402 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10403 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10404 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10405 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10406 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10407 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10408 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10409 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14]
10410 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10413 ; AVX2-LABEL: ult_14_v4i32:
10415 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10416 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10417 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10418 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10419 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10420 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10421 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10422 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10423 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10424 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10425 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10426 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10427 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10428 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10429 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10430 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10433 ; AVX512VPOPCNTDQ-LABEL: ult_14_v4i32:
10434 ; AVX512VPOPCNTDQ: # %bb.0:
10435 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10436 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
10437 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10438 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10439 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
10440 ; AVX512VPOPCNTDQ-NEXT: retq
10442 ; AVX512VPOPCNTDQVL-LABEL: ult_14_v4i32:
10443 ; AVX512VPOPCNTDQVL: # %bb.0:
10444 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
10445 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
10446 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
10447 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10448 ; AVX512VPOPCNTDQVL-NEXT: retq
10450 ; BITALG_NOVLX-LABEL: ult_14_v4i32:
10451 ; BITALG_NOVLX: # %bb.0:
10452 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10453 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
10454 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
10455 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10456 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10457 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10458 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10459 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10460 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10461 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10462 ; BITALG_NOVLX-NEXT: vzeroupper
10463 ; BITALG_NOVLX-NEXT: retq
10465 ; BITALG-LABEL: ult_14_v4i32:
10467 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
10468 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
10469 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10470 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10471 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10472 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10473 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10474 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
10475 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
10476 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10477 ; BITALG-NEXT: retq
10478 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10479 %3 = icmp ult <4 x i32> %2, <i32 14, i32 14, i32 14, i32 14>
10480 %4 = sext <4 x i1> %3 to <4 x i32>
10484 define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) {
10485 ; SSE2-LABEL: ugt_14_v4i32:
10487 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10488 ; SSE2-NEXT: psrlw $1, %xmm1
10489 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10490 ; SSE2-NEXT: psubb %xmm1, %xmm0
10491 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10492 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10493 ; SSE2-NEXT: pand %xmm1, %xmm2
10494 ; SSE2-NEXT: psrlw $2, %xmm0
10495 ; SSE2-NEXT: pand %xmm1, %xmm0
10496 ; SSE2-NEXT: paddb %xmm2, %xmm0
10497 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10498 ; SSE2-NEXT: psrlw $4, %xmm1
10499 ; SSE2-NEXT: paddb %xmm1, %xmm0
10500 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10501 ; SSE2-NEXT: pxor %xmm1, %xmm1
10502 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10503 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10504 ; SSE2-NEXT: psadbw %xmm1, %xmm2
10505 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10506 ; SSE2-NEXT: psadbw %xmm1, %xmm0
10507 ; SSE2-NEXT: packuswb %xmm2, %xmm0
10508 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10511 ; SSE3-LABEL: ugt_14_v4i32:
10513 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10514 ; SSE3-NEXT: psrlw $1, %xmm1
10515 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10516 ; SSE3-NEXT: psubb %xmm1, %xmm0
10517 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10518 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10519 ; SSE3-NEXT: pand %xmm1, %xmm2
10520 ; SSE3-NEXT: psrlw $2, %xmm0
10521 ; SSE3-NEXT: pand %xmm1, %xmm0
10522 ; SSE3-NEXT: paddb %xmm2, %xmm0
10523 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10524 ; SSE3-NEXT: psrlw $4, %xmm1
10525 ; SSE3-NEXT: paddb %xmm1, %xmm0
10526 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10527 ; SSE3-NEXT: pxor %xmm1, %xmm1
10528 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10529 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10530 ; SSE3-NEXT: psadbw %xmm1, %xmm2
10531 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10532 ; SSE3-NEXT: psadbw %xmm1, %xmm0
10533 ; SSE3-NEXT: packuswb %xmm2, %xmm0
10534 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10537 ; SSSE3-LABEL: ugt_14_v4i32:
10539 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10540 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
10541 ; SSSE3-NEXT: pand %xmm2, %xmm3
10542 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10543 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
10544 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
10545 ; SSSE3-NEXT: psrlw $4, %xmm0
10546 ; SSSE3-NEXT: pand %xmm2, %xmm0
10547 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
10548 ; SSSE3-NEXT: paddb %xmm4, %xmm1
10549 ; SSSE3-NEXT: pxor %xmm0, %xmm0
10550 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
10551 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10552 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
10553 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10554 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
10555 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
10556 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10557 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
10560 ; SSE41-LABEL: ugt_14_v4i32:
10562 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10563 ; SSE41-NEXT: movdqa %xmm0, %xmm2
10564 ; SSE41-NEXT: pand %xmm1, %xmm2
10565 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10566 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10567 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10568 ; SSE41-NEXT: psrlw $4, %xmm0
10569 ; SSE41-NEXT: pand %xmm1, %xmm0
10570 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10571 ; SSE41-NEXT: paddb %xmm4, %xmm3
10572 ; SSE41-NEXT: pxor %xmm1, %xmm1
10573 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
10574 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
10575 ; SSE41-NEXT: psadbw %xmm1, %xmm3
10576 ; SSE41-NEXT: psadbw %xmm1, %xmm0
10577 ; SSE41-NEXT: packuswb %xmm3, %xmm0
10578 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10581 ; AVX1-LABEL: ugt_14_v4i32:
10583 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10584 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10585 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10586 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10587 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10588 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10589 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10590 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10591 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10592 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10593 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10594 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10595 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10596 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10597 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
10600 ; AVX2-LABEL: ugt_14_v4i32:
10602 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10603 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10604 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10605 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10606 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10607 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10608 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10609 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10610 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10611 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10612 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10613 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10614 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10615 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10616 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10617 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10620 ; AVX512VPOPCNTDQ-LABEL: ugt_14_v4i32:
10621 ; AVX512VPOPCNTDQ: # %bb.0:
10622 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10623 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
10624 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10625 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10626 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
10627 ; AVX512VPOPCNTDQ-NEXT: retq
10629 ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v4i32:
10630 ; AVX512VPOPCNTDQVL: # %bb.0:
10631 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
10632 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
10633 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
10634 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10635 ; AVX512VPOPCNTDQVL-NEXT: retq
10637 ; BITALG_NOVLX-LABEL: ugt_14_v4i32:
10638 ; BITALG_NOVLX: # %bb.0:
10639 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10640 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
10641 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
10642 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10643 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10644 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10645 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10646 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10647 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10648 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10649 ; BITALG_NOVLX-NEXT: vzeroupper
10650 ; BITALG_NOVLX-NEXT: retq
10652 ; BITALG-LABEL: ugt_14_v4i32:
10654 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
10655 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
10656 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10657 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10658 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10659 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10660 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10661 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
10662 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
10663 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10664 ; BITALG-NEXT: retq
10665 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10666 %3 = icmp ugt <4 x i32> %2, <i32 14, i32 14, i32 14, i32 14>
10667 %4 = sext <4 x i1> %3 to <4 x i32>
10671 define <4 x i32> @ult_15_v4i32(<4 x i32> %0) {
10672 ; SSE2-LABEL: ult_15_v4i32:
10674 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10675 ; SSE2-NEXT: psrlw $1, %xmm1
10676 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10677 ; SSE2-NEXT: psubb %xmm1, %xmm0
10678 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10679 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10680 ; SSE2-NEXT: pand %xmm1, %xmm2
10681 ; SSE2-NEXT: psrlw $2, %xmm0
10682 ; SSE2-NEXT: pand %xmm1, %xmm0
10683 ; SSE2-NEXT: paddb %xmm2, %xmm0
10684 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10685 ; SSE2-NEXT: psrlw $4, %xmm1
10686 ; SSE2-NEXT: paddb %xmm0, %xmm1
10687 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10688 ; SSE2-NEXT: pxor %xmm0, %xmm0
10689 ; SSE2-NEXT: movdqa %xmm1, %xmm2
10690 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10691 ; SSE2-NEXT: psadbw %xmm0, %xmm2
10692 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10693 ; SSE2-NEXT: psadbw %xmm0, %xmm1
10694 ; SSE2-NEXT: packuswb %xmm2, %xmm1
10695 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15]
10696 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
10699 ; SSE3-LABEL: ult_15_v4i32:
10701 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10702 ; SSE3-NEXT: psrlw $1, %xmm1
10703 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10704 ; SSE3-NEXT: psubb %xmm1, %xmm0
10705 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10706 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10707 ; SSE3-NEXT: pand %xmm1, %xmm2
10708 ; SSE3-NEXT: psrlw $2, %xmm0
10709 ; SSE3-NEXT: pand %xmm1, %xmm0
10710 ; SSE3-NEXT: paddb %xmm2, %xmm0
10711 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10712 ; SSE3-NEXT: psrlw $4, %xmm1
10713 ; SSE3-NEXT: paddb %xmm0, %xmm1
10714 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10715 ; SSE3-NEXT: pxor %xmm0, %xmm0
10716 ; SSE3-NEXT: movdqa %xmm1, %xmm2
10717 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10718 ; SSE3-NEXT: psadbw %xmm0, %xmm2
10719 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10720 ; SSE3-NEXT: psadbw %xmm0, %xmm1
10721 ; SSE3-NEXT: packuswb %xmm2, %xmm1
10722 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15]
10723 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
10726 ; SSSE3-LABEL: ult_15_v4i32:
10728 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10729 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
10730 ; SSSE3-NEXT: pand %xmm1, %xmm2
10731 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10732 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
10733 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
10734 ; SSSE3-NEXT: psrlw $4, %xmm0
10735 ; SSSE3-NEXT: pand %xmm1, %xmm0
10736 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
10737 ; SSSE3-NEXT: paddb %xmm4, %xmm3
10738 ; SSSE3-NEXT: pxor %xmm0, %xmm0
10739 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
10740 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
10741 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
10742 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
10743 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
10744 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
10745 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15]
10746 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
10749 ; SSE41-LABEL: ult_15_v4i32:
10751 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10752 ; SSE41-NEXT: movdqa %xmm0, %xmm2
10753 ; SSE41-NEXT: pand %xmm1, %xmm2
10754 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10755 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10756 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10757 ; SSE41-NEXT: psrlw $4, %xmm0
10758 ; SSE41-NEXT: pand %xmm1, %xmm0
10759 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10760 ; SSE41-NEXT: paddb %xmm4, %xmm3
10761 ; SSE41-NEXT: pxor %xmm0, %xmm0
10762 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
10763 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
10764 ; SSE41-NEXT: psadbw %xmm0, %xmm3
10765 ; SSE41-NEXT: psadbw %xmm0, %xmm1
10766 ; SSE41-NEXT: packuswb %xmm3, %xmm1
10767 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15]
10768 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
10771 ; AVX1-LABEL: ult_15_v4i32:
10773 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10774 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10775 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10776 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10777 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10778 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10779 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10780 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10781 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10782 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10783 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10784 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10785 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10786 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10787 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15]
10788 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10791 ; AVX2-LABEL: ult_15_v4i32:
10793 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10794 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10795 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10796 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10797 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10798 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10799 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10800 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10801 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10802 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10803 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10804 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10805 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10806 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10807 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10808 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10811 ; AVX512VPOPCNTDQ-LABEL: ult_15_v4i32:
10812 ; AVX512VPOPCNTDQ: # %bb.0:
10813 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10814 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
10815 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10816 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10817 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
10818 ; AVX512VPOPCNTDQ-NEXT: retq
10820 ; AVX512VPOPCNTDQVL-LABEL: ult_15_v4i32:
10821 ; AVX512VPOPCNTDQVL: # %bb.0:
10822 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
10823 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
10824 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
10825 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10826 ; AVX512VPOPCNTDQVL-NEXT: retq
10828 ; BITALG_NOVLX-LABEL: ult_15_v4i32:
10829 ; BITALG_NOVLX: # %bb.0:
10830 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10831 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
10832 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
10833 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10834 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10835 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10836 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10837 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10838 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10839 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10840 ; BITALG_NOVLX-NEXT: vzeroupper
10841 ; BITALG_NOVLX-NEXT: retq
10843 ; BITALG-LABEL: ult_15_v4i32:
10845 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
10846 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
10847 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10848 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10849 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10850 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10851 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10852 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
10853 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
10854 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
10855 ; BITALG-NEXT: retq
10856 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10857 %3 = icmp ult <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
10858 %4 = sext <4 x i1> %3 to <4 x i32>
10862 define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) {
10863 ; SSE2-LABEL: ugt_15_v4i32:
10865 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10866 ; SSE2-NEXT: psrlw $1, %xmm1
10867 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10868 ; SSE2-NEXT: psubb %xmm1, %xmm0
10869 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10870 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10871 ; SSE2-NEXT: pand %xmm1, %xmm2
10872 ; SSE2-NEXT: psrlw $2, %xmm0
10873 ; SSE2-NEXT: pand %xmm1, %xmm0
10874 ; SSE2-NEXT: paddb %xmm2, %xmm0
10875 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10876 ; SSE2-NEXT: psrlw $4, %xmm1
10877 ; SSE2-NEXT: paddb %xmm1, %xmm0
10878 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10879 ; SSE2-NEXT: pxor %xmm1, %xmm1
10880 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10881 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10882 ; SSE2-NEXT: psadbw %xmm1, %xmm2
10883 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10884 ; SSE2-NEXT: psadbw %xmm1, %xmm0
10885 ; SSE2-NEXT: packuswb %xmm2, %xmm0
10886 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10889 ; SSE3-LABEL: ugt_15_v4i32:
10891 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10892 ; SSE3-NEXT: psrlw $1, %xmm1
10893 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10894 ; SSE3-NEXT: psubb %xmm1, %xmm0
10895 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10896 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10897 ; SSE3-NEXT: pand %xmm1, %xmm2
10898 ; SSE3-NEXT: psrlw $2, %xmm0
10899 ; SSE3-NEXT: pand %xmm1, %xmm0
10900 ; SSE3-NEXT: paddb %xmm2, %xmm0
10901 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10902 ; SSE3-NEXT: psrlw $4, %xmm1
10903 ; SSE3-NEXT: paddb %xmm1, %xmm0
10904 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10905 ; SSE3-NEXT: pxor %xmm1, %xmm1
10906 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10907 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10908 ; SSE3-NEXT: psadbw %xmm1, %xmm2
10909 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10910 ; SSE3-NEXT: psadbw %xmm1, %xmm0
10911 ; SSE3-NEXT: packuswb %xmm2, %xmm0
10912 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10915 ; SSSE3-LABEL: ugt_15_v4i32:
10917 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10918 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
10919 ; SSSE3-NEXT: pand %xmm2, %xmm3
10920 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10921 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
10922 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
10923 ; SSSE3-NEXT: psrlw $4, %xmm0
10924 ; SSSE3-NEXT: pand %xmm2, %xmm0
10925 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
10926 ; SSSE3-NEXT: paddb %xmm4, %xmm1
10927 ; SSSE3-NEXT: pxor %xmm0, %xmm0
10928 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
10929 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10930 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
10931 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10932 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
10933 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
10934 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10935 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
10938 ; SSE41-LABEL: ugt_15_v4i32:
10940 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10941 ; SSE41-NEXT: movdqa %xmm0, %xmm2
10942 ; SSE41-NEXT: pand %xmm1, %xmm2
10943 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10944 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10945 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10946 ; SSE41-NEXT: psrlw $4, %xmm0
10947 ; SSE41-NEXT: pand %xmm1, %xmm0
10948 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10949 ; SSE41-NEXT: paddb %xmm4, %xmm3
10950 ; SSE41-NEXT: pxor %xmm1, %xmm1
10951 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
10952 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
10953 ; SSE41-NEXT: psadbw %xmm1, %xmm3
10954 ; SSE41-NEXT: psadbw %xmm1, %xmm0
10955 ; SSE41-NEXT: packuswb %xmm3, %xmm0
10956 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10959 ; AVX1-LABEL: ugt_15_v4i32:
10961 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10962 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10963 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10964 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10965 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10966 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10967 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10968 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10969 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10970 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10971 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10972 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10973 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10974 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10975 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
10978 ; AVX2-LABEL: ugt_15_v4i32:
10980 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10981 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10982 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10983 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10984 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10985 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10986 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10987 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10988 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10989 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10990 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10991 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10992 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10993 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10994 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10995 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10998 ; AVX512VPOPCNTDQ-LABEL: ugt_15_v4i32:
10999 ; AVX512VPOPCNTDQ: # %bb.0:
11000 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11001 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11002 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
11003 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11004 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11005 ; AVX512VPOPCNTDQ-NEXT: retq
11007 ; AVX512VPOPCNTDQVL-LABEL: ugt_15_v4i32:
11008 ; AVX512VPOPCNTDQVL: # %bb.0:
11009 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11010 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11011 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11012 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11013 ; AVX512VPOPCNTDQVL-NEXT: retq
11015 ; BITALG_NOVLX-LABEL: ugt_15_v4i32:
11016 ; BITALG_NOVLX: # %bb.0:
11017 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11018 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11019 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11020 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11021 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11022 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11023 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11024 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11025 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
11026 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11027 ; BITALG_NOVLX-NEXT: vzeroupper
11028 ; BITALG_NOVLX-NEXT: retq
11030 ; BITALG-LABEL: ugt_15_v4i32:
11032 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11033 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11034 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11035 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11036 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11037 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11038 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11039 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11040 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11041 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11042 ; BITALG-NEXT: retq
11043 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11044 %3 = icmp ugt <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
11045 %4 = sext <4 x i1> %3 to <4 x i32>
11049 define <4 x i32> @ult_16_v4i32(<4 x i32> %0) {
11050 ; SSE2-LABEL: ult_16_v4i32:
11052 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11053 ; SSE2-NEXT: psrlw $1, %xmm1
11054 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11055 ; SSE2-NEXT: psubb %xmm1, %xmm0
11056 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11057 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11058 ; SSE2-NEXT: pand %xmm1, %xmm2
11059 ; SSE2-NEXT: psrlw $2, %xmm0
11060 ; SSE2-NEXT: pand %xmm1, %xmm0
11061 ; SSE2-NEXT: paddb %xmm2, %xmm0
11062 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11063 ; SSE2-NEXT: psrlw $4, %xmm1
11064 ; SSE2-NEXT: paddb %xmm0, %xmm1
11065 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11066 ; SSE2-NEXT: pxor %xmm0, %xmm0
11067 ; SSE2-NEXT: movdqa %xmm1, %xmm2
11068 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11069 ; SSE2-NEXT: psadbw %xmm0, %xmm2
11070 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11071 ; SSE2-NEXT: psadbw %xmm0, %xmm1
11072 ; SSE2-NEXT: packuswb %xmm2, %xmm1
11073 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16]
11074 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
11077 ; SSE3-LABEL: ult_16_v4i32:
11079 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11080 ; SSE3-NEXT: psrlw $1, %xmm1
11081 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11082 ; SSE3-NEXT: psubb %xmm1, %xmm0
11083 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11084 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11085 ; SSE3-NEXT: pand %xmm1, %xmm2
11086 ; SSE3-NEXT: psrlw $2, %xmm0
11087 ; SSE3-NEXT: pand %xmm1, %xmm0
11088 ; SSE3-NEXT: paddb %xmm2, %xmm0
11089 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11090 ; SSE3-NEXT: psrlw $4, %xmm1
11091 ; SSE3-NEXT: paddb %xmm0, %xmm1
11092 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11093 ; SSE3-NEXT: pxor %xmm0, %xmm0
11094 ; SSE3-NEXT: movdqa %xmm1, %xmm2
11095 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11096 ; SSE3-NEXT: psadbw %xmm0, %xmm2
11097 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11098 ; SSE3-NEXT: psadbw %xmm0, %xmm1
11099 ; SSE3-NEXT: packuswb %xmm2, %xmm1
11100 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16]
11101 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
11104 ; SSSE3-LABEL: ult_16_v4i32:
11106 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11107 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
11108 ; SSSE3-NEXT: pand %xmm1, %xmm2
11109 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11110 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
11111 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
11112 ; SSSE3-NEXT: psrlw $4, %xmm0
11113 ; SSSE3-NEXT: pand %xmm1, %xmm0
11114 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
11115 ; SSSE3-NEXT: paddb %xmm4, %xmm3
11116 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11117 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
11118 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
11119 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11120 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
11121 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
11122 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
11123 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16]
11124 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
11127 ; SSE41-LABEL: ult_16_v4i32:
11129 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11130 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11131 ; SSE41-NEXT: pand %xmm1, %xmm2
11132 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11133 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11134 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11135 ; SSE41-NEXT: psrlw $4, %xmm0
11136 ; SSE41-NEXT: pand %xmm1, %xmm0
11137 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11138 ; SSE41-NEXT: paddb %xmm4, %xmm3
11139 ; SSE41-NEXT: pxor %xmm0, %xmm0
11140 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
11141 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
11142 ; SSE41-NEXT: psadbw %xmm0, %xmm3
11143 ; SSE41-NEXT: psadbw %xmm0, %xmm1
11144 ; SSE41-NEXT: packuswb %xmm3, %xmm1
11145 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16]
11146 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
11149 ; AVX1-LABEL: ult_16_v4i32:
11151 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11152 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11153 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11154 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11155 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11156 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11157 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11158 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11159 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11160 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11161 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11162 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11163 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11164 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11165 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16]
11166 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11169 ; AVX2-LABEL: ult_16_v4i32:
11171 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11172 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
11173 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11174 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11175 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
11176 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
11177 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11178 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11179 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11180 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11181 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11182 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11183 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11184 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11185 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11186 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11189 ; AVX512VPOPCNTDQ-LABEL: ult_16_v4i32:
11190 ; AVX512VPOPCNTDQ: # %bb.0:
11191 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11192 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11193 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11194 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11195 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11196 ; AVX512VPOPCNTDQ-NEXT: retq
11198 ; AVX512VPOPCNTDQVL-LABEL: ult_16_v4i32:
11199 ; AVX512VPOPCNTDQVL: # %bb.0:
11200 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11201 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11202 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11203 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11204 ; AVX512VPOPCNTDQVL-NEXT: retq
11206 ; BITALG_NOVLX-LABEL: ult_16_v4i32:
11207 ; BITALG_NOVLX: # %bb.0:
11208 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11209 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11210 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11211 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11212 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11213 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11214 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11215 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11216 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11217 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11218 ; BITALG_NOVLX-NEXT: vzeroupper
11219 ; BITALG_NOVLX-NEXT: retq
11221 ; BITALG-LABEL: ult_16_v4i32:
11223 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11224 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11225 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11226 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11227 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11228 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11229 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11230 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11231 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11232 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11233 ; BITALG-NEXT: retq
11234 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11235 %3 = icmp ult <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
11236 %4 = sext <4 x i1> %3 to <4 x i32>
11240 define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) {
11241 ; SSE2-LABEL: ugt_16_v4i32:
11243 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11244 ; SSE2-NEXT: psrlw $1, %xmm1
11245 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11246 ; SSE2-NEXT: psubb %xmm1, %xmm0
11247 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11248 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11249 ; SSE2-NEXT: pand %xmm1, %xmm2
11250 ; SSE2-NEXT: psrlw $2, %xmm0
11251 ; SSE2-NEXT: pand %xmm1, %xmm0
11252 ; SSE2-NEXT: paddb %xmm2, %xmm0
11253 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11254 ; SSE2-NEXT: psrlw $4, %xmm1
11255 ; SSE2-NEXT: paddb %xmm1, %xmm0
11256 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11257 ; SSE2-NEXT: pxor %xmm1, %xmm1
11258 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11259 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
11260 ; SSE2-NEXT: psadbw %xmm1, %xmm2
11261 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11262 ; SSE2-NEXT: psadbw %xmm1, %xmm0
11263 ; SSE2-NEXT: packuswb %xmm2, %xmm0
11264 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11267 ; SSE3-LABEL: ugt_16_v4i32:
11269 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11270 ; SSE3-NEXT: psrlw $1, %xmm1
11271 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11272 ; SSE3-NEXT: psubb %xmm1, %xmm0
11273 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11274 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11275 ; SSE3-NEXT: pand %xmm1, %xmm2
11276 ; SSE3-NEXT: psrlw $2, %xmm0
11277 ; SSE3-NEXT: pand %xmm1, %xmm0
11278 ; SSE3-NEXT: paddb %xmm2, %xmm0
11279 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11280 ; SSE3-NEXT: psrlw $4, %xmm1
11281 ; SSE3-NEXT: paddb %xmm1, %xmm0
11282 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11283 ; SSE3-NEXT: pxor %xmm1, %xmm1
11284 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11285 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
11286 ; SSE3-NEXT: psadbw %xmm1, %xmm2
11287 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11288 ; SSE3-NEXT: psadbw %xmm1, %xmm0
11289 ; SSE3-NEXT: packuswb %xmm2, %xmm0
11290 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11293 ; SSSE3-LABEL: ugt_16_v4i32:
11295 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11296 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
11297 ; SSSE3-NEXT: pand %xmm2, %xmm3
11298 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11299 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
11300 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
11301 ; SSSE3-NEXT: psrlw $4, %xmm0
11302 ; SSSE3-NEXT: pand %xmm2, %xmm0
11303 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
11304 ; SSSE3-NEXT: paddb %xmm4, %xmm1
11305 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11306 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
11307 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11308 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
11309 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11310 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11311 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
11312 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11313 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
11316 ; SSE41-LABEL: ugt_16_v4i32:
11318 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11319 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11320 ; SSE41-NEXT: pand %xmm1, %xmm2
11321 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11322 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11323 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11324 ; SSE41-NEXT: psrlw $4, %xmm0
11325 ; SSE41-NEXT: pand %xmm1, %xmm0
11326 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11327 ; SSE41-NEXT: paddb %xmm4, %xmm3
11328 ; SSE41-NEXT: pxor %xmm1, %xmm1
11329 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
11330 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
11331 ; SSE41-NEXT: psadbw %xmm1, %xmm3
11332 ; SSE41-NEXT: psadbw %xmm1, %xmm0
11333 ; SSE41-NEXT: packuswb %xmm3, %xmm0
11334 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11337 ; AVX1-LABEL: ugt_16_v4i32:
11339 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11340 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11341 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11342 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11343 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11344 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11345 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11346 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11347 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11348 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11349 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11350 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11351 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11352 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11353 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
11356 ; AVX2-LABEL: ugt_16_v4i32:
11358 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11359 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
11360 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11361 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11362 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
11363 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
11364 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11365 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11366 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11367 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11368 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11369 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11370 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11371 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11372 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11373 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11376 ; AVX512VPOPCNTDQ-LABEL: ugt_16_v4i32:
11377 ; AVX512VPOPCNTDQ: # %bb.0:
11378 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11379 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11380 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11381 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11382 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11383 ; AVX512VPOPCNTDQ-NEXT: retq
11385 ; AVX512VPOPCNTDQVL-LABEL: ugt_16_v4i32:
11386 ; AVX512VPOPCNTDQVL: # %bb.0:
11387 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11388 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11389 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11390 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11391 ; AVX512VPOPCNTDQVL-NEXT: retq
11393 ; BITALG_NOVLX-LABEL: ugt_16_v4i32:
11394 ; BITALG_NOVLX: # %bb.0:
11395 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11396 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11397 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11398 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11399 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11400 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11401 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11402 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11403 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11404 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11405 ; BITALG_NOVLX-NEXT: vzeroupper
11406 ; BITALG_NOVLX-NEXT: retq
11408 ; BITALG-LABEL: ugt_16_v4i32:
11410 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11411 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11412 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11413 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11414 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11415 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11416 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11417 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11418 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11419 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11420 ; BITALG-NEXT: retq
11421 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11422 %3 = icmp ugt <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
11423 %4 = sext <4 x i1> %3 to <4 x i32>
11427 define <4 x i32> @ult_17_v4i32(<4 x i32> %0) {
11428 ; SSE2-LABEL: ult_17_v4i32:
11430 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11431 ; SSE2-NEXT: psrlw $1, %xmm1
11432 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11433 ; SSE2-NEXT: psubb %xmm1, %xmm0
11434 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11435 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11436 ; SSE2-NEXT: pand %xmm1, %xmm2
11437 ; SSE2-NEXT: psrlw $2, %xmm0
11438 ; SSE2-NEXT: pand %xmm1, %xmm0
11439 ; SSE2-NEXT: paddb %xmm2, %xmm0
11440 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11441 ; SSE2-NEXT: psrlw $4, %xmm1
11442 ; SSE2-NEXT: paddb %xmm0, %xmm1
11443 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11444 ; SSE2-NEXT: pxor %xmm0, %xmm0
11445 ; SSE2-NEXT: movdqa %xmm1, %xmm2
11446 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11447 ; SSE2-NEXT: psadbw %xmm0, %xmm2
11448 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11449 ; SSE2-NEXT: psadbw %xmm0, %xmm1
11450 ; SSE2-NEXT: packuswb %xmm2, %xmm1
11451 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17]
11452 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
11455 ; SSE3-LABEL: ult_17_v4i32:
11457 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11458 ; SSE3-NEXT: psrlw $1, %xmm1
11459 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11460 ; SSE3-NEXT: psubb %xmm1, %xmm0
11461 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11462 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11463 ; SSE3-NEXT: pand %xmm1, %xmm2
11464 ; SSE3-NEXT: psrlw $2, %xmm0
11465 ; SSE3-NEXT: pand %xmm1, %xmm0
11466 ; SSE3-NEXT: paddb %xmm2, %xmm0
11467 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11468 ; SSE3-NEXT: psrlw $4, %xmm1
11469 ; SSE3-NEXT: paddb %xmm0, %xmm1
11470 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11471 ; SSE3-NEXT: pxor %xmm0, %xmm0
11472 ; SSE3-NEXT: movdqa %xmm1, %xmm2
11473 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11474 ; SSE3-NEXT: psadbw %xmm0, %xmm2
11475 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11476 ; SSE3-NEXT: psadbw %xmm0, %xmm1
11477 ; SSE3-NEXT: packuswb %xmm2, %xmm1
11478 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17]
11479 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
11482 ; SSSE3-LABEL: ult_17_v4i32:
11484 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11485 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
11486 ; SSSE3-NEXT: pand %xmm1, %xmm2
11487 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11488 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
11489 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
11490 ; SSSE3-NEXT: psrlw $4, %xmm0
11491 ; SSSE3-NEXT: pand %xmm1, %xmm0
11492 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
11493 ; SSSE3-NEXT: paddb %xmm4, %xmm3
11494 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11495 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
11496 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
11497 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11498 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
11499 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
11500 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
11501 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17]
11502 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
11505 ; SSE41-LABEL: ult_17_v4i32:
11507 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11508 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11509 ; SSE41-NEXT: pand %xmm1, %xmm2
11510 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11511 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11512 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11513 ; SSE41-NEXT: psrlw $4, %xmm0
11514 ; SSE41-NEXT: pand %xmm1, %xmm0
11515 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11516 ; SSE41-NEXT: paddb %xmm4, %xmm3
11517 ; SSE41-NEXT: pxor %xmm0, %xmm0
11518 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
11519 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
11520 ; SSE41-NEXT: psadbw %xmm0, %xmm3
11521 ; SSE41-NEXT: psadbw %xmm0, %xmm1
11522 ; SSE41-NEXT: packuswb %xmm3, %xmm1
11523 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17]
11524 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
11527 ; AVX1-LABEL: ult_17_v4i32:
11529 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11530 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11531 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11532 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11533 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11534 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11535 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11536 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11537 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11538 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11539 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11540 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11541 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11542 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11543 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17]
11544 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11547 ; AVX2-LABEL: ult_17_v4i32:
11549 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11550 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
11551 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11552 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11553 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
11554 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
11555 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11556 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11557 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11558 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11559 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11560 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11561 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11562 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11563 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11564 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11567 ; AVX512VPOPCNTDQ-LABEL: ult_17_v4i32:
11568 ; AVX512VPOPCNTDQ: # %bb.0:
11569 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11570 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11571 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11572 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11573 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11574 ; AVX512VPOPCNTDQ-NEXT: retq
11576 ; AVX512VPOPCNTDQVL-LABEL: ult_17_v4i32:
11577 ; AVX512VPOPCNTDQVL: # %bb.0:
11578 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11579 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11580 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11581 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11582 ; AVX512VPOPCNTDQVL-NEXT: retq
11584 ; BITALG_NOVLX-LABEL: ult_17_v4i32:
11585 ; BITALG_NOVLX: # %bb.0:
11586 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11587 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11588 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11589 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11590 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11591 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11592 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11593 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11594 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11595 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11596 ; BITALG_NOVLX-NEXT: vzeroupper
11597 ; BITALG_NOVLX-NEXT: retq
11599 ; BITALG-LABEL: ult_17_v4i32:
11601 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11602 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11603 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11604 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11605 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11606 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11607 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11608 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11609 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11610 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11611 ; BITALG-NEXT: retq
11612 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11613 %3 = icmp ult <4 x i32> %2, <i32 17, i32 17, i32 17, i32 17>
11614 %4 = sext <4 x i1> %3 to <4 x i32>
11618 define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) {
11619 ; SSE2-LABEL: ugt_17_v4i32:
11621 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11622 ; SSE2-NEXT: psrlw $1, %xmm1
11623 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11624 ; SSE2-NEXT: psubb %xmm1, %xmm0
11625 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11626 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11627 ; SSE2-NEXT: pand %xmm1, %xmm2
11628 ; SSE2-NEXT: psrlw $2, %xmm0
11629 ; SSE2-NEXT: pand %xmm1, %xmm0
11630 ; SSE2-NEXT: paddb %xmm2, %xmm0
11631 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11632 ; SSE2-NEXT: psrlw $4, %xmm1
11633 ; SSE2-NEXT: paddb %xmm1, %xmm0
11634 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11635 ; SSE2-NEXT: pxor %xmm1, %xmm1
11636 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11637 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
11638 ; SSE2-NEXT: psadbw %xmm1, %xmm2
11639 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11640 ; SSE2-NEXT: psadbw %xmm1, %xmm0
11641 ; SSE2-NEXT: packuswb %xmm2, %xmm0
11642 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11645 ; SSE3-LABEL: ugt_17_v4i32:
11647 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11648 ; SSE3-NEXT: psrlw $1, %xmm1
11649 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11650 ; SSE3-NEXT: psubb %xmm1, %xmm0
11651 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11652 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11653 ; SSE3-NEXT: pand %xmm1, %xmm2
11654 ; SSE3-NEXT: psrlw $2, %xmm0
11655 ; SSE3-NEXT: pand %xmm1, %xmm0
11656 ; SSE3-NEXT: paddb %xmm2, %xmm0
11657 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11658 ; SSE3-NEXT: psrlw $4, %xmm1
11659 ; SSE3-NEXT: paddb %xmm1, %xmm0
11660 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11661 ; SSE3-NEXT: pxor %xmm1, %xmm1
11662 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11663 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
11664 ; SSE3-NEXT: psadbw %xmm1, %xmm2
11665 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11666 ; SSE3-NEXT: psadbw %xmm1, %xmm0
11667 ; SSE3-NEXT: packuswb %xmm2, %xmm0
11668 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11671 ; SSSE3-LABEL: ugt_17_v4i32:
11673 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11674 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
11675 ; SSSE3-NEXT: pand %xmm2, %xmm3
11676 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11677 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
11678 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
11679 ; SSSE3-NEXT: psrlw $4, %xmm0
11680 ; SSSE3-NEXT: pand %xmm2, %xmm0
11681 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
11682 ; SSSE3-NEXT: paddb %xmm4, %xmm1
11683 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11684 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
11685 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11686 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
11687 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11688 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11689 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
11690 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11691 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
11694 ; SSE41-LABEL: ugt_17_v4i32:
11696 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11697 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11698 ; SSE41-NEXT: pand %xmm1, %xmm2
11699 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11700 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11701 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11702 ; SSE41-NEXT: psrlw $4, %xmm0
11703 ; SSE41-NEXT: pand %xmm1, %xmm0
11704 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11705 ; SSE41-NEXT: paddb %xmm4, %xmm3
11706 ; SSE41-NEXT: pxor %xmm1, %xmm1
11707 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
11708 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
11709 ; SSE41-NEXT: psadbw %xmm1, %xmm3
11710 ; SSE41-NEXT: psadbw %xmm1, %xmm0
11711 ; SSE41-NEXT: packuswb %xmm3, %xmm0
11712 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11715 ; AVX1-LABEL: ugt_17_v4i32:
11717 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11718 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11719 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11720 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11721 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11722 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11723 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11724 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11725 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11726 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11727 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11728 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11729 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11730 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11731 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
11734 ; AVX2-LABEL: ugt_17_v4i32:
11736 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11737 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
11738 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11739 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11740 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
11741 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
11742 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11743 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11744 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11745 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11746 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11747 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11748 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11749 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11750 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11751 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11754 ; AVX512VPOPCNTDQ-LABEL: ugt_17_v4i32:
11755 ; AVX512VPOPCNTDQ: # %bb.0:
11756 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11757 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11758 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11759 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11760 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11761 ; AVX512VPOPCNTDQ-NEXT: retq
11763 ; AVX512VPOPCNTDQVL-LABEL: ugt_17_v4i32:
11764 ; AVX512VPOPCNTDQVL: # %bb.0:
11765 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11766 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11767 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11768 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11769 ; AVX512VPOPCNTDQVL-NEXT: retq
11771 ; BITALG_NOVLX-LABEL: ugt_17_v4i32:
11772 ; BITALG_NOVLX: # %bb.0:
11773 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11774 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11775 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11776 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11777 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11778 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11779 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11780 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11781 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11782 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11783 ; BITALG_NOVLX-NEXT: vzeroupper
11784 ; BITALG_NOVLX-NEXT: retq
11786 ; BITALG-LABEL: ugt_17_v4i32:
11788 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11789 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11790 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11791 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11792 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11793 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11794 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11795 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11796 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11797 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11798 ; BITALG-NEXT: retq
11799 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11800 %3 = icmp ugt <4 x i32> %2, <i32 17, i32 17, i32 17, i32 17>
11801 %4 = sext <4 x i1> %3 to <4 x i32>
11805 define <4 x i32> @ult_18_v4i32(<4 x i32> %0) {
11806 ; SSE2-LABEL: ult_18_v4i32:
11808 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11809 ; SSE2-NEXT: psrlw $1, %xmm1
11810 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11811 ; SSE2-NEXT: psubb %xmm1, %xmm0
11812 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11813 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11814 ; SSE2-NEXT: pand %xmm1, %xmm2
11815 ; SSE2-NEXT: psrlw $2, %xmm0
11816 ; SSE2-NEXT: pand %xmm1, %xmm0
11817 ; SSE2-NEXT: paddb %xmm2, %xmm0
11818 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11819 ; SSE2-NEXT: psrlw $4, %xmm1
11820 ; SSE2-NEXT: paddb %xmm0, %xmm1
11821 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11822 ; SSE2-NEXT: pxor %xmm0, %xmm0
11823 ; SSE2-NEXT: movdqa %xmm1, %xmm2
11824 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11825 ; SSE2-NEXT: psadbw %xmm0, %xmm2
11826 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11827 ; SSE2-NEXT: psadbw %xmm0, %xmm1
11828 ; SSE2-NEXT: packuswb %xmm2, %xmm1
11829 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18]
11830 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
11833 ; SSE3-LABEL: ult_18_v4i32:
11835 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11836 ; SSE3-NEXT: psrlw $1, %xmm1
11837 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11838 ; SSE3-NEXT: psubb %xmm1, %xmm0
11839 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11840 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11841 ; SSE3-NEXT: pand %xmm1, %xmm2
11842 ; SSE3-NEXT: psrlw $2, %xmm0
11843 ; SSE3-NEXT: pand %xmm1, %xmm0
11844 ; SSE3-NEXT: paddb %xmm2, %xmm0
11845 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11846 ; SSE3-NEXT: psrlw $4, %xmm1
11847 ; SSE3-NEXT: paddb %xmm0, %xmm1
11848 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11849 ; SSE3-NEXT: pxor %xmm0, %xmm0
11850 ; SSE3-NEXT: movdqa %xmm1, %xmm2
11851 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11852 ; SSE3-NEXT: psadbw %xmm0, %xmm2
11853 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11854 ; SSE3-NEXT: psadbw %xmm0, %xmm1
11855 ; SSE3-NEXT: packuswb %xmm2, %xmm1
11856 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18]
11857 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
11860 ; SSSE3-LABEL: ult_18_v4i32:
11862 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11863 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
11864 ; SSSE3-NEXT: pand %xmm1, %xmm2
11865 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11866 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
11867 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
11868 ; SSSE3-NEXT: psrlw $4, %xmm0
11869 ; SSSE3-NEXT: pand %xmm1, %xmm0
11870 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
11871 ; SSSE3-NEXT: paddb %xmm4, %xmm3
11872 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11873 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
11874 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
11875 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11876 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
11877 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
11878 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
11879 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18]
11880 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
11883 ; SSE41-LABEL: ult_18_v4i32:
11885 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11886 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11887 ; SSE41-NEXT: pand %xmm1, %xmm2
11888 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11889 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11890 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11891 ; SSE41-NEXT: psrlw $4, %xmm0
11892 ; SSE41-NEXT: pand %xmm1, %xmm0
11893 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11894 ; SSE41-NEXT: paddb %xmm4, %xmm3
11895 ; SSE41-NEXT: pxor %xmm0, %xmm0
11896 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
11897 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
11898 ; SSE41-NEXT: psadbw %xmm0, %xmm3
11899 ; SSE41-NEXT: psadbw %xmm0, %xmm1
11900 ; SSE41-NEXT: packuswb %xmm3, %xmm1
11901 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18]
11902 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
11905 ; AVX1-LABEL: ult_18_v4i32:
11907 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11908 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11909 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11910 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11911 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11912 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11913 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11914 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11915 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11916 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11917 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11918 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11919 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11920 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11921 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18,18,18]
11922 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11925 ; AVX2-LABEL: ult_18_v4i32:
11927 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11928 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
11929 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11930 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11931 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
11932 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
11933 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11934 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11935 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11936 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11937 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11938 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11939 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11940 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11941 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
11942 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11945 ; AVX512VPOPCNTDQ-LABEL: ult_18_v4i32:
11946 ; AVX512VPOPCNTDQ: # %bb.0:
11947 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11948 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11949 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
11950 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11951 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11952 ; AVX512VPOPCNTDQ-NEXT: retq
11954 ; AVX512VPOPCNTDQVL-LABEL: ult_18_v4i32:
11955 ; AVX512VPOPCNTDQVL: # %bb.0:
11956 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11957 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11958 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11959 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11960 ; AVX512VPOPCNTDQVL-NEXT: retq
11962 ; BITALG_NOVLX-LABEL: ult_18_v4i32:
11963 ; BITALG_NOVLX: # %bb.0:
11964 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11965 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11966 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11967 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11968 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11969 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11970 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11971 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11972 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
11973 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11974 ; BITALG_NOVLX-NEXT: vzeroupper
11975 ; BITALG_NOVLX-NEXT: retq
11977 ; BITALG-LABEL: ult_18_v4i32:
11979 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11980 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11981 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11982 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11983 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11984 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11985 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11986 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
11987 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
11988 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
11989 ; BITALG-NEXT: retq
11990 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11991 %3 = icmp ult <4 x i32> %2, <i32 18, i32 18, i32 18, i32 18>
11992 %4 = sext <4 x i1> %3 to <4 x i32>
11996 define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) {
11997 ; SSE2-LABEL: ugt_18_v4i32:
11999 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12000 ; SSE2-NEXT: psrlw $1, %xmm1
12001 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12002 ; SSE2-NEXT: psubb %xmm1, %xmm0
12003 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12004 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12005 ; SSE2-NEXT: pand %xmm1, %xmm2
12006 ; SSE2-NEXT: psrlw $2, %xmm0
12007 ; SSE2-NEXT: pand %xmm1, %xmm0
12008 ; SSE2-NEXT: paddb %xmm2, %xmm0
12009 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12010 ; SSE2-NEXT: psrlw $4, %xmm1
12011 ; SSE2-NEXT: paddb %xmm1, %xmm0
12012 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12013 ; SSE2-NEXT: pxor %xmm1, %xmm1
12014 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12015 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
12016 ; SSE2-NEXT: psadbw %xmm1, %xmm2
12017 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
12018 ; SSE2-NEXT: psadbw %xmm1, %xmm0
12019 ; SSE2-NEXT: packuswb %xmm2, %xmm0
12020 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12023 ; SSE3-LABEL: ugt_18_v4i32:
12025 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12026 ; SSE3-NEXT: psrlw $1, %xmm1
12027 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12028 ; SSE3-NEXT: psubb %xmm1, %xmm0
12029 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12030 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12031 ; SSE3-NEXT: pand %xmm1, %xmm2
12032 ; SSE3-NEXT: psrlw $2, %xmm0
12033 ; SSE3-NEXT: pand %xmm1, %xmm0
12034 ; SSE3-NEXT: paddb %xmm2, %xmm0
12035 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12036 ; SSE3-NEXT: psrlw $4, %xmm1
12037 ; SSE3-NEXT: paddb %xmm1, %xmm0
12038 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12039 ; SSE3-NEXT: pxor %xmm1, %xmm1
12040 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12041 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
12042 ; SSE3-NEXT: psadbw %xmm1, %xmm2
12043 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
12044 ; SSE3-NEXT: psadbw %xmm1, %xmm0
12045 ; SSE3-NEXT: packuswb %xmm2, %xmm0
12046 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12049 ; SSSE3-LABEL: ugt_18_v4i32:
12051 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12052 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
12053 ; SSSE3-NEXT: pand %xmm2, %xmm3
12054 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12055 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
12056 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
12057 ; SSSE3-NEXT: psrlw $4, %xmm0
12058 ; SSSE3-NEXT: pand %xmm2, %xmm0
12059 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
12060 ; SSSE3-NEXT: paddb %xmm4, %xmm1
12061 ; SSSE3-NEXT: pxor %xmm0, %xmm0
12062 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
12063 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12064 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
12065 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12066 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
12067 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
12068 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12069 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
12072 ; SSE41-LABEL: ugt_18_v4i32:
12074 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12075 ; SSE41-NEXT: movdqa %xmm0, %xmm2
12076 ; SSE41-NEXT: pand %xmm1, %xmm2
12077 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12078 ; SSE41-NEXT: movdqa %xmm3, %xmm4
12079 ; SSE41-NEXT: pshufb %xmm2, %xmm4
12080 ; SSE41-NEXT: psrlw $4, %xmm0
12081 ; SSE41-NEXT: pand %xmm1, %xmm0
12082 ; SSE41-NEXT: pshufb %xmm0, %xmm3
12083 ; SSE41-NEXT: paddb %xmm4, %xmm3
12084 ; SSE41-NEXT: pxor %xmm1, %xmm1
12085 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
12086 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
12087 ; SSE41-NEXT: psadbw %xmm1, %xmm3
12088 ; SSE41-NEXT: psadbw %xmm1, %xmm0
12089 ; SSE41-NEXT: packuswb %xmm3, %xmm0
12090 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12093 ; AVX1-LABEL: ugt_18_v4i32:
12095 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12096 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
12097 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12098 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12099 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
12100 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
12101 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12102 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12103 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
12104 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12105 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12106 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12107 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12108 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12109 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
12112 ; AVX2-LABEL: ugt_18_v4i32:
12114 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12115 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12116 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12117 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12118 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12119 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12120 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12121 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12122 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12123 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12124 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12125 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12126 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12127 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12128 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12129 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12132 ; AVX512VPOPCNTDQ-LABEL: ugt_18_v4i32:
12133 ; AVX512VPOPCNTDQ: # %bb.0:
12134 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12135 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12136 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12137 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12138 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12139 ; AVX512VPOPCNTDQ-NEXT: retq
12141 ; AVX512VPOPCNTDQVL-LABEL: ugt_18_v4i32:
12142 ; AVX512VPOPCNTDQVL: # %bb.0:
12143 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12144 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
12145 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
12146 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12147 ; AVX512VPOPCNTDQVL-NEXT: retq
12149 ; BITALG_NOVLX-LABEL: ugt_18_v4i32:
12150 ; BITALG_NOVLX: # %bb.0:
12151 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12152 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12153 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12154 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12155 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12156 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12157 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12158 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12159 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12160 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12161 ; BITALG_NOVLX-NEXT: vzeroupper
12162 ; BITALG_NOVLX-NEXT: retq
12164 ; BITALG-LABEL: ugt_18_v4i32:
12166 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12167 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12168 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12169 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12170 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12171 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12172 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12173 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
12174 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
12175 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12176 ; BITALG-NEXT: retq
12177 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12178 %3 = icmp ugt <4 x i32> %2, <i32 18, i32 18, i32 18, i32 18>
12179 %4 = sext <4 x i1> %3 to <4 x i32>
12183 define <4 x i32> @ult_19_v4i32(<4 x i32> %0) {
12184 ; SSE2-LABEL: ult_19_v4i32:
12186 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12187 ; SSE2-NEXT: psrlw $1, %xmm1
12188 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12189 ; SSE2-NEXT: psubb %xmm1, %xmm0
12190 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12191 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12192 ; SSE2-NEXT: pand %xmm1, %xmm2
12193 ; SSE2-NEXT: psrlw $2, %xmm0
12194 ; SSE2-NEXT: pand %xmm1, %xmm0
12195 ; SSE2-NEXT: paddb %xmm2, %xmm0
12196 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12197 ; SSE2-NEXT: psrlw $4, %xmm1
12198 ; SSE2-NEXT: paddb %xmm0, %xmm1
12199 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12200 ; SSE2-NEXT: pxor %xmm0, %xmm0
12201 ; SSE2-NEXT: movdqa %xmm1, %xmm2
12202 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12203 ; SSE2-NEXT: psadbw %xmm0, %xmm2
12204 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12205 ; SSE2-NEXT: psadbw %xmm0, %xmm1
12206 ; SSE2-NEXT: packuswb %xmm2, %xmm1
12207 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19]
12208 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
12211 ; SSE3-LABEL: ult_19_v4i32:
12213 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12214 ; SSE3-NEXT: psrlw $1, %xmm1
12215 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12216 ; SSE3-NEXT: psubb %xmm1, %xmm0
12217 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12218 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12219 ; SSE3-NEXT: pand %xmm1, %xmm2
12220 ; SSE3-NEXT: psrlw $2, %xmm0
12221 ; SSE3-NEXT: pand %xmm1, %xmm0
12222 ; SSE3-NEXT: paddb %xmm2, %xmm0
12223 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12224 ; SSE3-NEXT: psrlw $4, %xmm1
12225 ; SSE3-NEXT: paddb %xmm0, %xmm1
12226 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12227 ; SSE3-NEXT: pxor %xmm0, %xmm0
12228 ; SSE3-NEXT: movdqa %xmm1, %xmm2
12229 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12230 ; SSE3-NEXT: psadbw %xmm0, %xmm2
12231 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12232 ; SSE3-NEXT: psadbw %xmm0, %xmm1
12233 ; SSE3-NEXT: packuswb %xmm2, %xmm1
12234 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19]
12235 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
12238 ; SSSE3-LABEL: ult_19_v4i32:
12240 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12241 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
12242 ; SSSE3-NEXT: pand %xmm1, %xmm2
12243 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12244 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
12245 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
12246 ; SSSE3-NEXT: psrlw $4, %xmm0
12247 ; SSSE3-NEXT: pand %xmm1, %xmm0
12248 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
12249 ; SSSE3-NEXT: paddb %xmm4, %xmm3
12250 ; SSSE3-NEXT: pxor %xmm0, %xmm0
12251 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
12252 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
12253 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
12254 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
12255 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
12256 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
12257 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19]
12258 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
12261 ; SSE41-LABEL: ult_19_v4i32:
12263 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12264 ; SSE41-NEXT: movdqa %xmm0, %xmm2
12265 ; SSE41-NEXT: pand %xmm1, %xmm2
12266 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12267 ; SSE41-NEXT: movdqa %xmm3, %xmm4
12268 ; SSE41-NEXT: pshufb %xmm2, %xmm4
12269 ; SSE41-NEXT: psrlw $4, %xmm0
12270 ; SSE41-NEXT: pand %xmm1, %xmm0
12271 ; SSE41-NEXT: pshufb %xmm0, %xmm3
12272 ; SSE41-NEXT: paddb %xmm4, %xmm3
12273 ; SSE41-NEXT: pxor %xmm0, %xmm0
12274 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
12275 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
12276 ; SSE41-NEXT: psadbw %xmm0, %xmm3
12277 ; SSE41-NEXT: psadbw %xmm0, %xmm1
12278 ; SSE41-NEXT: packuswb %xmm3, %xmm1
12279 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19]
12280 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
12283 ; AVX1-LABEL: ult_19_v4i32:
12285 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12286 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
12287 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12288 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12289 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
12290 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
12291 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12292 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12293 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
12294 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12295 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12296 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12297 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12298 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12299 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19,19,19]
12300 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12303 ; AVX2-LABEL: ult_19_v4i32:
12305 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12306 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12307 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12308 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12309 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12310 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12311 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12312 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12313 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12314 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12315 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12316 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12317 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12318 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12319 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12320 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12323 ; AVX512VPOPCNTDQ-LABEL: ult_19_v4i32:
12324 ; AVX512VPOPCNTDQ: # %bb.0:
12325 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12326 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12327 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12328 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12329 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12330 ; AVX512VPOPCNTDQ-NEXT: retq
12332 ; AVX512VPOPCNTDQVL-LABEL: ult_19_v4i32:
12333 ; AVX512VPOPCNTDQVL: # %bb.0:
12334 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12335 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
12336 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
12337 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12338 ; AVX512VPOPCNTDQVL-NEXT: retq
12340 ; BITALG_NOVLX-LABEL: ult_19_v4i32:
12341 ; BITALG_NOVLX: # %bb.0:
12342 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12343 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12344 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12345 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12346 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12347 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12348 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12349 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12350 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12351 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12352 ; BITALG_NOVLX-NEXT: vzeroupper
12353 ; BITALG_NOVLX-NEXT: retq
12355 ; BITALG-LABEL: ult_19_v4i32:
12357 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12358 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12359 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12360 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12361 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12362 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12363 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12364 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
12365 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
12366 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12367 ; BITALG-NEXT: retq
12368 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12369 %3 = icmp ult <4 x i32> %2, <i32 19, i32 19, i32 19, i32 19>
12370 %4 = sext <4 x i1> %3 to <4 x i32>
12374 define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) {
12375 ; SSE2-LABEL: ugt_19_v4i32:
12377 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12378 ; SSE2-NEXT: psrlw $1, %xmm1
12379 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12380 ; SSE2-NEXT: psubb %xmm1, %xmm0
12381 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12382 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12383 ; SSE2-NEXT: pand %xmm1, %xmm2
12384 ; SSE2-NEXT: psrlw $2, %xmm0
12385 ; SSE2-NEXT: pand %xmm1, %xmm0
12386 ; SSE2-NEXT: paddb %xmm2, %xmm0
12387 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12388 ; SSE2-NEXT: psrlw $4, %xmm1
12389 ; SSE2-NEXT: paddb %xmm1, %xmm0
12390 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12391 ; SSE2-NEXT: pxor %xmm1, %xmm1
12392 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12393 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
12394 ; SSE2-NEXT: psadbw %xmm1, %xmm2
12395 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
12396 ; SSE2-NEXT: psadbw %xmm1, %xmm0
12397 ; SSE2-NEXT: packuswb %xmm2, %xmm0
12398 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12401 ; SSE3-LABEL: ugt_19_v4i32:
12403 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12404 ; SSE3-NEXT: psrlw $1, %xmm1
12405 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12406 ; SSE3-NEXT: psubb %xmm1, %xmm0
12407 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12408 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12409 ; SSE3-NEXT: pand %xmm1, %xmm2
12410 ; SSE3-NEXT: psrlw $2, %xmm0
12411 ; SSE3-NEXT: pand %xmm1, %xmm0
12412 ; SSE3-NEXT: paddb %xmm2, %xmm0
12413 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12414 ; SSE3-NEXT: psrlw $4, %xmm1
12415 ; SSE3-NEXT: paddb %xmm1, %xmm0
12416 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12417 ; SSE3-NEXT: pxor %xmm1, %xmm1
12418 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12419 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
12420 ; SSE3-NEXT: psadbw %xmm1, %xmm2
12421 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
12422 ; SSE3-NEXT: psadbw %xmm1, %xmm0
12423 ; SSE3-NEXT: packuswb %xmm2, %xmm0
12424 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12427 ; SSSE3-LABEL: ugt_19_v4i32:
12429 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12430 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
12431 ; SSSE3-NEXT: pand %xmm2, %xmm3
12432 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12433 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
12434 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
12435 ; SSSE3-NEXT: psrlw $4, %xmm0
12436 ; SSSE3-NEXT: pand %xmm2, %xmm0
12437 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
12438 ; SSSE3-NEXT: paddb %xmm4, %xmm1
12439 ; SSSE3-NEXT: pxor %xmm0, %xmm0
12440 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
12441 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12442 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
12443 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12444 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
12445 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
12446 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12447 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
12450 ; SSE41-LABEL: ugt_19_v4i32:
12452 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12453 ; SSE41-NEXT: movdqa %xmm0, %xmm2
12454 ; SSE41-NEXT: pand %xmm1, %xmm2
12455 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12456 ; SSE41-NEXT: movdqa %xmm3, %xmm4
12457 ; SSE41-NEXT: pshufb %xmm2, %xmm4
12458 ; SSE41-NEXT: psrlw $4, %xmm0
12459 ; SSE41-NEXT: pand %xmm1, %xmm0
12460 ; SSE41-NEXT: pshufb %xmm0, %xmm3
12461 ; SSE41-NEXT: paddb %xmm4, %xmm3
12462 ; SSE41-NEXT: pxor %xmm1, %xmm1
12463 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
12464 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
12465 ; SSE41-NEXT: psadbw %xmm1, %xmm3
12466 ; SSE41-NEXT: psadbw %xmm1, %xmm0
12467 ; SSE41-NEXT: packuswb %xmm3, %xmm0
12468 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12471 ; AVX1-LABEL: ugt_19_v4i32:
12473 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12474 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
12475 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12476 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12477 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
12478 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
12479 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12480 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12481 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
12482 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12483 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12484 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12485 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12486 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12487 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
12490 ; AVX2-LABEL: ugt_19_v4i32:
12492 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12493 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12494 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12495 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12496 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12497 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12498 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12499 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12500 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12501 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12502 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12503 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12504 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12505 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12506 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12507 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12510 ; AVX512VPOPCNTDQ-LABEL: ugt_19_v4i32:
12511 ; AVX512VPOPCNTDQ: # %bb.0:
12512 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12513 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12514 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12515 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12516 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12517 ; AVX512VPOPCNTDQ-NEXT: retq
12519 ; AVX512VPOPCNTDQVL-LABEL: ugt_19_v4i32:
12520 ; AVX512VPOPCNTDQVL: # %bb.0:
12521 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12522 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
12523 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
12524 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12525 ; AVX512VPOPCNTDQVL-NEXT: retq
12527 ; BITALG_NOVLX-LABEL: ugt_19_v4i32:
12528 ; BITALG_NOVLX: # %bb.0:
12529 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12530 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12531 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12532 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12533 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12534 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12535 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12536 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12537 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12538 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12539 ; BITALG_NOVLX-NEXT: vzeroupper
12540 ; BITALG_NOVLX-NEXT: retq
12542 ; BITALG-LABEL: ugt_19_v4i32:
12544 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12545 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12546 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12547 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12548 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12549 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12550 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12551 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
12552 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
12553 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12554 ; BITALG-NEXT: retq
12555 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12556 %3 = icmp ugt <4 x i32> %2, <i32 19, i32 19, i32 19, i32 19>
12557 %4 = sext <4 x i1> %3 to <4 x i32>
12561 define <4 x i32> @ult_20_v4i32(<4 x i32> %0) {
12562 ; SSE2-LABEL: ult_20_v4i32:
12564 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12565 ; SSE2-NEXT: psrlw $1, %xmm1
12566 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12567 ; SSE2-NEXT: psubb %xmm1, %xmm0
12568 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12569 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12570 ; SSE2-NEXT: pand %xmm1, %xmm2
12571 ; SSE2-NEXT: psrlw $2, %xmm0
12572 ; SSE2-NEXT: pand %xmm1, %xmm0
12573 ; SSE2-NEXT: paddb %xmm2, %xmm0
12574 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12575 ; SSE2-NEXT: psrlw $4, %xmm1
12576 ; SSE2-NEXT: paddb %xmm0, %xmm1
12577 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12578 ; SSE2-NEXT: pxor %xmm0, %xmm0
12579 ; SSE2-NEXT: movdqa %xmm1, %xmm2
12580 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12581 ; SSE2-NEXT: psadbw %xmm0, %xmm2
12582 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12583 ; SSE2-NEXT: psadbw %xmm0, %xmm1
12584 ; SSE2-NEXT: packuswb %xmm2, %xmm1
12585 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20]
12586 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
12589 ; SSE3-LABEL: ult_20_v4i32:
12591 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12592 ; SSE3-NEXT: psrlw $1, %xmm1
12593 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12594 ; SSE3-NEXT: psubb %xmm1, %xmm0
12595 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12596 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12597 ; SSE3-NEXT: pand %xmm1, %xmm2
12598 ; SSE3-NEXT: psrlw $2, %xmm0
12599 ; SSE3-NEXT: pand %xmm1, %xmm0
12600 ; SSE3-NEXT: paddb %xmm2, %xmm0
12601 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12602 ; SSE3-NEXT: psrlw $4, %xmm1
12603 ; SSE3-NEXT: paddb %xmm0, %xmm1
12604 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12605 ; SSE3-NEXT: pxor %xmm0, %xmm0
12606 ; SSE3-NEXT: movdqa %xmm1, %xmm2
12607 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12608 ; SSE3-NEXT: psadbw %xmm0, %xmm2
12609 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12610 ; SSE3-NEXT: psadbw %xmm0, %xmm1
12611 ; SSE3-NEXT: packuswb %xmm2, %xmm1
12612 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20]
12613 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
12616 ; SSSE3-LABEL: ult_20_v4i32:
12618 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12619 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
12620 ; SSSE3-NEXT: pand %xmm1, %xmm2
12621 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12622 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
12623 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
12624 ; SSSE3-NEXT: psrlw $4, %xmm0
12625 ; SSSE3-NEXT: pand %xmm1, %xmm0
12626 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
12627 ; SSSE3-NEXT: paddb %xmm4, %xmm3
12628 ; SSSE3-NEXT: pxor %xmm0, %xmm0
12629 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
12630 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
12631 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
12632 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
12633 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
12634 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
12635 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20]
12636 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
12639 ; SSE41-LABEL: ult_20_v4i32:
12641 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12642 ; SSE41-NEXT: movdqa %xmm0, %xmm2
12643 ; SSE41-NEXT: pand %xmm1, %xmm2
12644 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12645 ; SSE41-NEXT: movdqa %xmm3, %xmm4
12646 ; SSE41-NEXT: pshufb %xmm2, %xmm4
12647 ; SSE41-NEXT: psrlw $4, %xmm0
12648 ; SSE41-NEXT: pand %xmm1, %xmm0
12649 ; SSE41-NEXT: pshufb %xmm0, %xmm3
12650 ; SSE41-NEXT: paddb %xmm4, %xmm3
12651 ; SSE41-NEXT: pxor %xmm0, %xmm0
12652 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
12653 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
12654 ; SSE41-NEXT: psadbw %xmm0, %xmm3
12655 ; SSE41-NEXT: psadbw %xmm0, %xmm1
12656 ; SSE41-NEXT: packuswb %xmm3, %xmm1
12657 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20]
12658 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
12661 ; AVX1-LABEL: ult_20_v4i32:
12663 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12664 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
12665 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12666 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12667 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
12668 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
12669 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12670 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12671 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
12672 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12673 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12674 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12675 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12676 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12677 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20,20,20]
12678 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12681 ; AVX2-LABEL: ult_20_v4i32:
12683 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12684 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12685 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12686 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12687 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12688 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12689 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12690 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12691 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12692 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12693 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12694 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12695 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12696 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12697 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12698 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12701 ; AVX512VPOPCNTDQ-LABEL: ult_20_v4i32:
12702 ; AVX512VPOPCNTDQ: # %bb.0:
12703 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12704 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12705 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12706 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12707 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12708 ; AVX512VPOPCNTDQ-NEXT: retq
12710 ; AVX512VPOPCNTDQVL-LABEL: ult_20_v4i32:
12711 ; AVX512VPOPCNTDQVL: # %bb.0:
12712 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12713 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
12714 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
12715 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12716 ; AVX512VPOPCNTDQVL-NEXT: retq
12718 ; BITALG_NOVLX-LABEL: ult_20_v4i32:
12719 ; BITALG_NOVLX: # %bb.0:
12720 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12721 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12722 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12723 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12724 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12725 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12726 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12727 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12728 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12729 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12730 ; BITALG_NOVLX-NEXT: vzeroupper
12731 ; BITALG_NOVLX-NEXT: retq
12733 ; BITALG-LABEL: ult_20_v4i32:
12735 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12736 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12737 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12738 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12739 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12740 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12741 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12742 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
12743 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
12744 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12745 ; BITALG-NEXT: retq
12746 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12747 %3 = icmp ult <4 x i32> %2, <i32 20, i32 20, i32 20, i32 20>
12748 %4 = sext <4 x i1> %3 to <4 x i32>
12752 define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) {
12753 ; SSE2-LABEL: ugt_20_v4i32:
12755 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12756 ; SSE2-NEXT: psrlw $1, %xmm1
12757 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12758 ; SSE2-NEXT: psubb %xmm1, %xmm0
12759 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12760 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12761 ; SSE2-NEXT: pand %xmm1, %xmm2
12762 ; SSE2-NEXT: psrlw $2, %xmm0
12763 ; SSE2-NEXT: pand %xmm1, %xmm0
12764 ; SSE2-NEXT: paddb %xmm2, %xmm0
12765 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12766 ; SSE2-NEXT: psrlw $4, %xmm1
12767 ; SSE2-NEXT: paddb %xmm1, %xmm0
12768 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12769 ; SSE2-NEXT: pxor %xmm1, %xmm1
12770 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12771 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
12772 ; SSE2-NEXT: psadbw %xmm1, %xmm2
12773 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
12774 ; SSE2-NEXT: psadbw %xmm1, %xmm0
12775 ; SSE2-NEXT: packuswb %xmm2, %xmm0
12776 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12779 ; SSE3-LABEL: ugt_20_v4i32:
12781 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12782 ; SSE3-NEXT: psrlw $1, %xmm1
12783 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12784 ; SSE3-NEXT: psubb %xmm1, %xmm0
12785 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12786 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12787 ; SSE3-NEXT: pand %xmm1, %xmm2
12788 ; SSE3-NEXT: psrlw $2, %xmm0
12789 ; SSE3-NEXT: pand %xmm1, %xmm0
12790 ; SSE3-NEXT: paddb %xmm2, %xmm0
12791 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12792 ; SSE3-NEXT: psrlw $4, %xmm1
12793 ; SSE3-NEXT: paddb %xmm1, %xmm0
12794 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12795 ; SSE3-NEXT: pxor %xmm1, %xmm1
12796 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12797 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
12798 ; SSE3-NEXT: psadbw %xmm1, %xmm2
12799 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
12800 ; SSE3-NEXT: psadbw %xmm1, %xmm0
12801 ; SSE3-NEXT: packuswb %xmm2, %xmm0
12802 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12805 ; SSSE3-LABEL: ugt_20_v4i32:
12807 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12808 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
12809 ; SSSE3-NEXT: pand %xmm2, %xmm3
12810 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12811 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
12812 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
12813 ; SSSE3-NEXT: psrlw $4, %xmm0
12814 ; SSSE3-NEXT: pand %xmm2, %xmm0
12815 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
12816 ; SSSE3-NEXT: paddb %xmm4, %xmm1
12817 ; SSSE3-NEXT: pxor %xmm0, %xmm0
12818 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
12819 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12820 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
12821 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12822 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
12823 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
12824 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12825 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
12828 ; SSE41-LABEL: ugt_20_v4i32:
12830 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12831 ; SSE41-NEXT: movdqa %xmm0, %xmm2
12832 ; SSE41-NEXT: pand %xmm1, %xmm2
12833 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12834 ; SSE41-NEXT: movdqa %xmm3, %xmm4
12835 ; SSE41-NEXT: pshufb %xmm2, %xmm4
12836 ; SSE41-NEXT: psrlw $4, %xmm0
12837 ; SSE41-NEXT: pand %xmm1, %xmm0
12838 ; SSE41-NEXT: pshufb %xmm0, %xmm3
12839 ; SSE41-NEXT: paddb %xmm4, %xmm3
12840 ; SSE41-NEXT: pxor %xmm1, %xmm1
12841 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
12842 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
12843 ; SSE41-NEXT: psadbw %xmm1, %xmm3
12844 ; SSE41-NEXT: psadbw %xmm1, %xmm0
12845 ; SSE41-NEXT: packuswb %xmm3, %xmm0
12846 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12849 ; AVX1-LABEL: ugt_20_v4i32:
12851 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12852 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
12853 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12854 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12855 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
12856 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
12857 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12858 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12859 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
12860 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12861 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12862 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12863 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12864 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12865 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
12868 ; AVX2-LABEL: ugt_20_v4i32:
12870 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12871 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12872 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12873 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12874 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12875 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12876 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12877 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12878 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12879 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12880 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12881 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12882 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12883 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12884 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12885 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12888 ; AVX512VPOPCNTDQ-LABEL: ugt_20_v4i32:
12889 ; AVX512VPOPCNTDQ: # %bb.0:
12890 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12891 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12892 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12893 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12894 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12895 ; AVX512VPOPCNTDQ-NEXT: retq
12897 ; AVX512VPOPCNTDQVL-LABEL: ugt_20_v4i32:
12898 ; AVX512VPOPCNTDQVL: # %bb.0:
12899 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12900 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
12901 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
12902 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12903 ; AVX512VPOPCNTDQVL-NEXT: retq
12905 ; BITALG_NOVLX-LABEL: ugt_20_v4i32:
12906 ; BITALG_NOVLX: # %bb.0:
12907 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12908 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12909 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12910 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12911 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12912 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12913 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12914 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12915 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12916 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12917 ; BITALG_NOVLX-NEXT: vzeroupper
12918 ; BITALG_NOVLX-NEXT: retq
12920 ; BITALG-LABEL: ugt_20_v4i32:
12922 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12923 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12924 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12925 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12926 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12927 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12928 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12929 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
12930 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
12931 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
12932 ; BITALG-NEXT: retq
12933 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12934 %3 = icmp ugt <4 x i32> %2, <i32 20, i32 20, i32 20, i32 20>
12935 %4 = sext <4 x i1> %3 to <4 x i32>
12939 define <4 x i32> @ult_21_v4i32(<4 x i32> %0) {
12940 ; SSE2-LABEL: ult_21_v4i32:
12942 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12943 ; SSE2-NEXT: psrlw $1, %xmm1
12944 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12945 ; SSE2-NEXT: psubb %xmm1, %xmm0
12946 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12947 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12948 ; SSE2-NEXT: pand %xmm1, %xmm2
12949 ; SSE2-NEXT: psrlw $2, %xmm0
12950 ; SSE2-NEXT: pand %xmm1, %xmm0
12951 ; SSE2-NEXT: paddb %xmm2, %xmm0
12952 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12953 ; SSE2-NEXT: psrlw $4, %xmm1
12954 ; SSE2-NEXT: paddb %xmm0, %xmm1
12955 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12956 ; SSE2-NEXT: pxor %xmm0, %xmm0
12957 ; SSE2-NEXT: movdqa %xmm1, %xmm2
12958 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12959 ; SSE2-NEXT: psadbw %xmm0, %xmm2
12960 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12961 ; SSE2-NEXT: psadbw %xmm0, %xmm1
12962 ; SSE2-NEXT: packuswb %xmm2, %xmm1
12963 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21]
12964 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
12967 ; SSE3-LABEL: ult_21_v4i32:
12969 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12970 ; SSE3-NEXT: psrlw $1, %xmm1
12971 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12972 ; SSE3-NEXT: psubb %xmm1, %xmm0
12973 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12974 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12975 ; SSE3-NEXT: pand %xmm1, %xmm2
12976 ; SSE3-NEXT: psrlw $2, %xmm0
12977 ; SSE3-NEXT: pand %xmm1, %xmm0
12978 ; SSE3-NEXT: paddb %xmm2, %xmm0
12979 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12980 ; SSE3-NEXT: psrlw $4, %xmm1
12981 ; SSE3-NEXT: paddb %xmm0, %xmm1
12982 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12983 ; SSE3-NEXT: pxor %xmm0, %xmm0
12984 ; SSE3-NEXT: movdqa %xmm1, %xmm2
12985 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12986 ; SSE3-NEXT: psadbw %xmm0, %xmm2
12987 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12988 ; SSE3-NEXT: psadbw %xmm0, %xmm1
12989 ; SSE3-NEXT: packuswb %xmm2, %xmm1
12990 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21]
12991 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
12994 ; SSSE3-LABEL: ult_21_v4i32:
12996 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12997 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
12998 ; SSSE3-NEXT: pand %xmm1, %xmm2
12999 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13000 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
13001 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
13002 ; SSSE3-NEXT: psrlw $4, %xmm0
13003 ; SSSE3-NEXT: pand %xmm1, %xmm0
13004 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
13005 ; SSSE3-NEXT: paddb %xmm4, %xmm3
13006 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13007 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
13008 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
13009 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13010 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
13011 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
13012 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
13013 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21]
13014 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
13017 ; SSE41-LABEL: ult_21_v4i32:
13019 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13020 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13021 ; SSE41-NEXT: pand %xmm1, %xmm2
13022 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13023 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13024 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13025 ; SSE41-NEXT: psrlw $4, %xmm0
13026 ; SSE41-NEXT: pand %xmm1, %xmm0
13027 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13028 ; SSE41-NEXT: paddb %xmm4, %xmm3
13029 ; SSE41-NEXT: pxor %xmm0, %xmm0
13030 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
13031 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
13032 ; SSE41-NEXT: psadbw %xmm0, %xmm3
13033 ; SSE41-NEXT: psadbw %xmm0, %xmm1
13034 ; SSE41-NEXT: packuswb %xmm3, %xmm1
13035 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21]
13036 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
13039 ; AVX1-LABEL: ult_21_v4i32:
13041 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13042 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13043 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13044 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13045 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13046 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13047 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13048 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13049 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13050 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13051 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13052 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13053 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13054 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13055 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21,21,21]
13056 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13059 ; AVX2-LABEL: ult_21_v4i32:
13061 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13062 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
13063 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13064 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13065 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
13066 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
13067 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13068 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13069 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
13070 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13071 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13072 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13073 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13074 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13075 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13076 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13079 ; AVX512VPOPCNTDQ-LABEL: ult_21_v4i32:
13080 ; AVX512VPOPCNTDQ: # %bb.0:
13081 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13082 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
13083 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13084 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13085 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
13086 ; AVX512VPOPCNTDQ-NEXT: retq
13088 ; AVX512VPOPCNTDQVL-LABEL: ult_21_v4i32:
13089 ; AVX512VPOPCNTDQVL: # %bb.0:
13090 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
13091 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
13092 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13093 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13094 ; AVX512VPOPCNTDQVL-NEXT: retq
13096 ; BITALG_NOVLX-LABEL: ult_21_v4i32:
13097 ; BITALG_NOVLX: # %bb.0:
13098 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13099 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
13100 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
13101 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13102 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13103 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13104 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13105 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13106 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13107 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13108 ; BITALG_NOVLX-NEXT: vzeroupper
13109 ; BITALG_NOVLX-NEXT: retq
13111 ; BITALG-LABEL: ult_21_v4i32:
13113 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
13114 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
13115 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13116 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13117 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13118 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13119 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13120 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
13121 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13122 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13123 ; BITALG-NEXT: retq
13124 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13125 %3 = icmp ult <4 x i32> %2, <i32 21, i32 21, i32 21, i32 21>
13126 %4 = sext <4 x i1> %3 to <4 x i32>
13130 define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) {
13131 ; SSE2-LABEL: ugt_21_v4i32:
13133 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13134 ; SSE2-NEXT: psrlw $1, %xmm1
13135 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13136 ; SSE2-NEXT: psubb %xmm1, %xmm0
13137 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13138 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13139 ; SSE2-NEXT: pand %xmm1, %xmm2
13140 ; SSE2-NEXT: psrlw $2, %xmm0
13141 ; SSE2-NEXT: pand %xmm1, %xmm0
13142 ; SSE2-NEXT: paddb %xmm2, %xmm0
13143 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13144 ; SSE2-NEXT: psrlw $4, %xmm1
13145 ; SSE2-NEXT: paddb %xmm1, %xmm0
13146 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13147 ; SSE2-NEXT: pxor %xmm1, %xmm1
13148 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13149 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13150 ; SSE2-NEXT: psadbw %xmm1, %xmm2
13151 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13152 ; SSE2-NEXT: psadbw %xmm1, %xmm0
13153 ; SSE2-NEXT: packuswb %xmm2, %xmm0
13154 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13157 ; SSE3-LABEL: ugt_21_v4i32:
13159 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13160 ; SSE3-NEXT: psrlw $1, %xmm1
13161 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13162 ; SSE3-NEXT: psubb %xmm1, %xmm0
13163 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13164 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13165 ; SSE3-NEXT: pand %xmm1, %xmm2
13166 ; SSE3-NEXT: psrlw $2, %xmm0
13167 ; SSE3-NEXT: pand %xmm1, %xmm0
13168 ; SSE3-NEXT: paddb %xmm2, %xmm0
13169 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13170 ; SSE3-NEXT: psrlw $4, %xmm1
13171 ; SSE3-NEXT: paddb %xmm1, %xmm0
13172 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13173 ; SSE3-NEXT: pxor %xmm1, %xmm1
13174 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13175 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13176 ; SSE3-NEXT: psadbw %xmm1, %xmm2
13177 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13178 ; SSE3-NEXT: psadbw %xmm1, %xmm0
13179 ; SSE3-NEXT: packuswb %xmm2, %xmm0
13180 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13183 ; SSSE3-LABEL: ugt_21_v4i32:
13185 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13186 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
13187 ; SSSE3-NEXT: pand %xmm2, %xmm3
13188 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13189 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
13190 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
13191 ; SSSE3-NEXT: psrlw $4, %xmm0
13192 ; SSSE3-NEXT: pand %xmm2, %xmm0
13193 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
13194 ; SSSE3-NEXT: paddb %xmm4, %xmm1
13195 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13196 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
13197 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13198 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
13199 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13200 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13201 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
13202 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13203 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
13206 ; SSE41-LABEL: ugt_21_v4i32:
13208 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13209 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13210 ; SSE41-NEXT: pand %xmm1, %xmm2
13211 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13212 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13213 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13214 ; SSE41-NEXT: psrlw $4, %xmm0
13215 ; SSE41-NEXT: pand %xmm1, %xmm0
13216 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13217 ; SSE41-NEXT: paddb %xmm4, %xmm3
13218 ; SSE41-NEXT: pxor %xmm1, %xmm1
13219 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
13220 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
13221 ; SSE41-NEXT: psadbw %xmm1, %xmm3
13222 ; SSE41-NEXT: psadbw %xmm1, %xmm0
13223 ; SSE41-NEXT: packuswb %xmm3, %xmm0
13224 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13227 ; AVX1-LABEL: ugt_21_v4i32:
13229 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13230 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13231 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13232 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13233 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13234 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13235 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13236 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13237 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13238 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13239 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13240 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13241 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13242 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13243 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
13246 ; AVX2-LABEL: ugt_21_v4i32:
13248 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13249 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
13250 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13251 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13252 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
13253 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
13254 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13255 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13256 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
13257 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13258 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13259 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13260 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13261 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13262 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13263 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13266 ; AVX512VPOPCNTDQ-LABEL: ugt_21_v4i32:
13267 ; AVX512VPOPCNTDQ: # %bb.0:
13268 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13269 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
13270 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13271 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13272 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
13273 ; AVX512VPOPCNTDQ-NEXT: retq
13275 ; AVX512VPOPCNTDQVL-LABEL: ugt_21_v4i32:
13276 ; AVX512VPOPCNTDQVL: # %bb.0:
13277 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
13278 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
13279 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13280 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13281 ; AVX512VPOPCNTDQVL-NEXT: retq
13283 ; BITALG_NOVLX-LABEL: ugt_21_v4i32:
13284 ; BITALG_NOVLX: # %bb.0:
13285 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13286 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
13287 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
13288 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13289 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13290 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13291 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13292 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13293 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13294 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13295 ; BITALG_NOVLX-NEXT: vzeroupper
13296 ; BITALG_NOVLX-NEXT: retq
13298 ; BITALG-LABEL: ugt_21_v4i32:
13300 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
13301 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
13302 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13303 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13304 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13305 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13306 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13307 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
13308 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13309 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13310 ; BITALG-NEXT: retq
13311 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13312 %3 = icmp ugt <4 x i32> %2, <i32 21, i32 21, i32 21, i32 21>
13313 %4 = sext <4 x i1> %3 to <4 x i32>
13317 define <4 x i32> @ult_22_v4i32(<4 x i32> %0) {
13318 ; SSE2-LABEL: ult_22_v4i32:
13320 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13321 ; SSE2-NEXT: psrlw $1, %xmm1
13322 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13323 ; SSE2-NEXT: psubb %xmm1, %xmm0
13324 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13325 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13326 ; SSE2-NEXT: pand %xmm1, %xmm2
13327 ; SSE2-NEXT: psrlw $2, %xmm0
13328 ; SSE2-NEXT: pand %xmm1, %xmm0
13329 ; SSE2-NEXT: paddb %xmm2, %xmm0
13330 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13331 ; SSE2-NEXT: psrlw $4, %xmm1
13332 ; SSE2-NEXT: paddb %xmm0, %xmm1
13333 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13334 ; SSE2-NEXT: pxor %xmm0, %xmm0
13335 ; SSE2-NEXT: movdqa %xmm1, %xmm2
13336 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13337 ; SSE2-NEXT: psadbw %xmm0, %xmm2
13338 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13339 ; SSE2-NEXT: psadbw %xmm0, %xmm1
13340 ; SSE2-NEXT: packuswb %xmm2, %xmm1
13341 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22]
13342 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
13345 ; SSE3-LABEL: ult_22_v4i32:
13347 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13348 ; SSE3-NEXT: psrlw $1, %xmm1
13349 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13350 ; SSE3-NEXT: psubb %xmm1, %xmm0
13351 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13352 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13353 ; SSE3-NEXT: pand %xmm1, %xmm2
13354 ; SSE3-NEXT: psrlw $2, %xmm0
13355 ; SSE3-NEXT: pand %xmm1, %xmm0
13356 ; SSE3-NEXT: paddb %xmm2, %xmm0
13357 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13358 ; SSE3-NEXT: psrlw $4, %xmm1
13359 ; SSE3-NEXT: paddb %xmm0, %xmm1
13360 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13361 ; SSE3-NEXT: pxor %xmm0, %xmm0
13362 ; SSE3-NEXT: movdqa %xmm1, %xmm2
13363 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13364 ; SSE3-NEXT: psadbw %xmm0, %xmm2
13365 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13366 ; SSE3-NEXT: psadbw %xmm0, %xmm1
13367 ; SSE3-NEXT: packuswb %xmm2, %xmm1
13368 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22]
13369 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
13372 ; SSSE3-LABEL: ult_22_v4i32:
13374 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13375 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
13376 ; SSSE3-NEXT: pand %xmm1, %xmm2
13377 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13378 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
13379 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
13380 ; SSSE3-NEXT: psrlw $4, %xmm0
13381 ; SSSE3-NEXT: pand %xmm1, %xmm0
13382 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
13383 ; SSSE3-NEXT: paddb %xmm4, %xmm3
13384 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13385 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
13386 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
13387 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13388 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
13389 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
13390 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
13391 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22]
13392 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
13395 ; SSE41-LABEL: ult_22_v4i32:
13397 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13398 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13399 ; SSE41-NEXT: pand %xmm1, %xmm2
13400 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13401 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13402 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13403 ; SSE41-NEXT: psrlw $4, %xmm0
13404 ; SSE41-NEXT: pand %xmm1, %xmm0
13405 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13406 ; SSE41-NEXT: paddb %xmm4, %xmm3
13407 ; SSE41-NEXT: pxor %xmm0, %xmm0
13408 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
13409 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
13410 ; SSE41-NEXT: psadbw %xmm0, %xmm3
13411 ; SSE41-NEXT: psadbw %xmm0, %xmm1
13412 ; SSE41-NEXT: packuswb %xmm3, %xmm1
13413 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22]
13414 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
13417 ; AVX1-LABEL: ult_22_v4i32:
13419 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13420 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13421 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13422 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13423 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13424 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13425 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13426 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13427 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13428 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13429 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13430 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13431 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13432 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13433 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22,22,22]
13434 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13437 ; AVX2-LABEL: ult_22_v4i32:
13439 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13440 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
13441 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13442 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13443 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
13444 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
13445 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13446 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13447 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
13448 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13449 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13450 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13451 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13452 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13453 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13454 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13457 ; AVX512VPOPCNTDQ-LABEL: ult_22_v4i32:
13458 ; AVX512VPOPCNTDQ: # %bb.0:
13459 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13460 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
13461 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13462 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13463 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
13464 ; AVX512VPOPCNTDQ-NEXT: retq
13466 ; AVX512VPOPCNTDQVL-LABEL: ult_22_v4i32:
13467 ; AVX512VPOPCNTDQVL: # %bb.0:
13468 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
13469 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
13470 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13471 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13472 ; AVX512VPOPCNTDQVL-NEXT: retq
13474 ; BITALG_NOVLX-LABEL: ult_22_v4i32:
13475 ; BITALG_NOVLX: # %bb.0:
13476 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13477 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
13478 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
13479 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13480 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13481 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13482 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13483 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13484 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13485 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13486 ; BITALG_NOVLX-NEXT: vzeroupper
13487 ; BITALG_NOVLX-NEXT: retq
13489 ; BITALG-LABEL: ult_22_v4i32:
13491 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
13492 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
13493 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13494 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13495 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13496 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13497 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13498 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
13499 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13500 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13501 ; BITALG-NEXT: retq
13502 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13503 %3 = icmp ult <4 x i32> %2, <i32 22, i32 22, i32 22, i32 22>
13504 %4 = sext <4 x i1> %3 to <4 x i32>
13508 define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) {
13509 ; SSE2-LABEL: ugt_22_v4i32:
13511 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13512 ; SSE2-NEXT: psrlw $1, %xmm1
13513 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13514 ; SSE2-NEXT: psubb %xmm1, %xmm0
13515 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13516 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13517 ; SSE2-NEXT: pand %xmm1, %xmm2
13518 ; SSE2-NEXT: psrlw $2, %xmm0
13519 ; SSE2-NEXT: pand %xmm1, %xmm0
13520 ; SSE2-NEXT: paddb %xmm2, %xmm0
13521 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13522 ; SSE2-NEXT: psrlw $4, %xmm1
13523 ; SSE2-NEXT: paddb %xmm1, %xmm0
13524 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13525 ; SSE2-NEXT: pxor %xmm1, %xmm1
13526 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13527 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13528 ; SSE2-NEXT: psadbw %xmm1, %xmm2
13529 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13530 ; SSE2-NEXT: psadbw %xmm1, %xmm0
13531 ; SSE2-NEXT: packuswb %xmm2, %xmm0
13532 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13535 ; SSE3-LABEL: ugt_22_v4i32:
13537 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13538 ; SSE3-NEXT: psrlw $1, %xmm1
13539 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13540 ; SSE3-NEXT: psubb %xmm1, %xmm0
13541 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13542 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13543 ; SSE3-NEXT: pand %xmm1, %xmm2
13544 ; SSE3-NEXT: psrlw $2, %xmm0
13545 ; SSE3-NEXT: pand %xmm1, %xmm0
13546 ; SSE3-NEXT: paddb %xmm2, %xmm0
13547 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13548 ; SSE3-NEXT: psrlw $4, %xmm1
13549 ; SSE3-NEXT: paddb %xmm1, %xmm0
13550 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13551 ; SSE3-NEXT: pxor %xmm1, %xmm1
13552 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13553 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13554 ; SSE3-NEXT: psadbw %xmm1, %xmm2
13555 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13556 ; SSE3-NEXT: psadbw %xmm1, %xmm0
13557 ; SSE3-NEXT: packuswb %xmm2, %xmm0
13558 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13561 ; SSSE3-LABEL: ugt_22_v4i32:
13563 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13564 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
13565 ; SSSE3-NEXT: pand %xmm2, %xmm3
13566 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13567 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
13568 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
13569 ; SSSE3-NEXT: psrlw $4, %xmm0
13570 ; SSSE3-NEXT: pand %xmm2, %xmm0
13571 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
13572 ; SSSE3-NEXT: paddb %xmm4, %xmm1
13573 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13574 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
13575 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13576 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
13577 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13578 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13579 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
13580 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13581 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
13584 ; SSE41-LABEL: ugt_22_v4i32:
13586 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13587 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13588 ; SSE41-NEXT: pand %xmm1, %xmm2
13589 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13590 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13591 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13592 ; SSE41-NEXT: psrlw $4, %xmm0
13593 ; SSE41-NEXT: pand %xmm1, %xmm0
13594 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13595 ; SSE41-NEXT: paddb %xmm4, %xmm3
13596 ; SSE41-NEXT: pxor %xmm1, %xmm1
13597 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
13598 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
13599 ; SSE41-NEXT: psadbw %xmm1, %xmm3
13600 ; SSE41-NEXT: psadbw %xmm1, %xmm0
13601 ; SSE41-NEXT: packuswb %xmm3, %xmm0
13602 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13605 ; AVX1-LABEL: ugt_22_v4i32:
13607 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13608 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13609 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13610 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13611 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13612 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13613 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13614 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13615 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13616 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13617 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13618 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13619 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13620 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13621 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
13624 ; AVX2-LABEL: ugt_22_v4i32:
13626 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13627 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
13628 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13629 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13630 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
13631 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
13632 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13633 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13634 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
13635 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13636 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13637 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13638 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13639 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13640 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13641 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13644 ; AVX512VPOPCNTDQ-LABEL: ugt_22_v4i32:
13645 ; AVX512VPOPCNTDQ: # %bb.0:
13646 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13647 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
13648 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13649 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13650 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
13651 ; AVX512VPOPCNTDQ-NEXT: retq
13653 ; AVX512VPOPCNTDQVL-LABEL: ugt_22_v4i32:
13654 ; AVX512VPOPCNTDQVL: # %bb.0:
13655 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
13656 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
13657 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13658 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13659 ; AVX512VPOPCNTDQVL-NEXT: retq
13661 ; BITALG_NOVLX-LABEL: ugt_22_v4i32:
13662 ; BITALG_NOVLX: # %bb.0:
13663 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13664 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
13665 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
13666 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13667 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13668 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13669 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13670 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13671 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13672 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13673 ; BITALG_NOVLX-NEXT: vzeroupper
13674 ; BITALG_NOVLX-NEXT: retq
13676 ; BITALG-LABEL: ugt_22_v4i32:
13678 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
13679 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
13680 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13681 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13682 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13683 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13684 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13685 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
13686 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13687 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13688 ; BITALG-NEXT: retq
13689 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13690 %3 = icmp ugt <4 x i32> %2, <i32 22, i32 22, i32 22, i32 22>
13691 %4 = sext <4 x i1> %3 to <4 x i32>
13695 define <4 x i32> @ult_23_v4i32(<4 x i32> %0) {
13696 ; SSE2-LABEL: ult_23_v4i32:
13698 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13699 ; SSE2-NEXT: psrlw $1, %xmm1
13700 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13701 ; SSE2-NEXT: psubb %xmm1, %xmm0
13702 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13703 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13704 ; SSE2-NEXT: pand %xmm1, %xmm2
13705 ; SSE2-NEXT: psrlw $2, %xmm0
13706 ; SSE2-NEXT: pand %xmm1, %xmm0
13707 ; SSE2-NEXT: paddb %xmm2, %xmm0
13708 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13709 ; SSE2-NEXT: psrlw $4, %xmm1
13710 ; SSE2-NEXT: paddb %xmm0, %xmm1
13711 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13712 ; SSE2-NEXT: pxor %xmm0, %xmm0
13713 ; SSE2-NEXT: movdqa %xmm1, %xmm2
13714 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13715 ; SSE2-NEXT: psadbw %xmm0, %xmm2
13716 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13717 ; SSE2-NEXT: psadbw %xmm0, %xmm1
13718 ; SSE2-NEXT: packuswb %xmm2, %xmm1
13719 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23]
13720 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
13723 ; SSE3-LABEL: ult_23_v4i32:
13725 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13726 ; SSE3-NEXT: psrlw $1, %xmm1
13727 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13728 ; SSE3-NEXT: psubb %xmm1, %xmm0
13729 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13730 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13731 ; SSE3-NEXT: pand %xmm1, %xmm2
13732 ; SSE3-NEXT: psrlw $2, %xmm0
13733 ; SSE3-NEXT: pand %xmm1, %xmm0
13734 ; SSE3-NEXT: paddb %xmm2, %xmm0
13735 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13736 ; SSE3-NEXT: psrlw $4, %xmm1
13737 ; SSE3-NEXT: paddb %xmm0, %xmm1
13738 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13739 ; SSE3-NEXT: pxor %xmm0, %xmm0
13740 ; SSE3-NEXT: movdqa %xmm1, %xmm2
13741 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13742 ; SSE3-NEXT: psadbw %xmm0, %xmm2
13743 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13744 ; SSE3-NEXT: psadbw %xmm0, %xmm1
13745 ; SSE3-NEXT: packuswb %xmm2, %xmm1
13746 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23]
13747 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
13750 ; SSSE3-LABEL: ult_23_v4i32:
13752 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13753 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
13754 ; SSSE3-NEXT: pand %xmm1, %xmm2
13755 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13756 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
13757 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
13758 ; SSSE3-NEXT: psrlw $4, %xmm0
13759 ; SSSE3-NEXT: pand %xmm1, %xmm0
13760 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
13761 ; SSSE3-NEXT: paddb %xmm4, %xmm3
13762 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13763 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
13764 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
13765 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13766 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
13767 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
13768 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
13769 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23]
13770 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
13773 ; SSE41-LABEL: ult_23_v4i32:
13775 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13776 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13777 ; SSE41-NEXT: pand %xmm1, %xmm2
13778 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13779 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13780 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13781 ; SSE41-NEXT: psrlw $4, %xmm0
13782 ; SSE41-NEXT: pand %xmm1, %xmm0
13783 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13784 ; SSE41-NEXT: paddb %xmm4, %xmm3
13785 ; SSE41-NEXT: pxor %xmm0, %xmm0
13786 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
13787 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
13788 ; SSE41-NEXT: psadbw %xmm0, %xmm3
13789 ; SSE41-NEXT: psadbw %xmm0, %xmm1
13790 ; SSE41-NEXT: packuswb %xmm3, %xmm1
13791 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23]
13792 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
13795 ; AVX1-LABEL: ult_23_v4i32:
13797 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13798 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13799 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13800 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13801 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13802 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13803 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13804 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13805 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13806 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13807 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13808 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13809 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13810 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13811 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23,23,23]
13812 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13815 ; AVX2-LABEL: ult_23_v4i32:
13817 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13818 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
13819 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13820 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13821 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
13822 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
13823 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13824 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13825 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
13826 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13827 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13828 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13829 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13830 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13831 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13832 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13835 ; AVX512VPOPCNTDQ-LABEL: ult_23_v4i32:
13836 ; AVX512VPOPCNTDQ: # %bb.0:
13837 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13838 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
13839 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13840 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13841 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
13842 ; AVX512VPOPCNTDQ-NEXT: retq
13844 ; AVX512VPOPCNTDQVL-LABEL: ult_23_v4i32:
13845 ; AVX512VPOPCNTDQVL: # %bb.0:
13846 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
13847 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
13848 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13849 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13850 ; AVX512VPOPCNTDQVL-NEXT: retq
13852 ; BITALG_NOVLX-LABEL: ult_23_v4i32:
13853 ; BITALG_NOVLX: # %bb.0:
13854 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13855 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
13856 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
13857 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13858 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13859 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13860 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13861 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13862 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13863 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13864 ; BITALG_NOVLX-NEXT: vzeroupper
13865 ; BITALG_NOVLX-NEXT: retq
13867 ; BITALG-LABEL: ult_23_v4i32:
13869 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
13870 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
13871 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13872 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13873 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13874 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13875 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13876 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
13877 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
13878 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
13879 ; BITALG-NEXT: retq
13880 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13881 %3 = icmp ult <4 x i32> %2, <i32 23, i32 23, i32 23, i32 23>
13882 %4 = sext <4 x i1> %3 to <4 x i32>
13886 define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) {
13887 ; SSE2-LABEL: ugt_23_v4i32:
13889 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13890 ; SSE2-NEXT: psrlw $1, %xmm1
13891 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13892 ; SSE2-NEXT: psubb %xmm1, %xmm0
13893 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13894 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13895 ; SSE2-NEXT: pand %xmm1, %xmm2
13896 ; SSE2-NEXT: psrlw $2, %xmm0
13897 ; SSE2-NEXT: pand %xmm1, %xmm0
13898 ; SSE2-NEXT: paddb %xmm2, %xmm0
13899 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13900 ; SSE2-NEXT: psrlw $4, %xmm1
13901 ; SSE2-NEXT: paddb %xmm1, %xmm0
13902 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13903 ; SSE2-NEXT: pxor %xmm1, %xmm1
13904 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13905 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13906 ; SSE2-NEXT: psadbw %xmm1, %xmm2
13907 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13908 ; SSE2-NEXT: psadbw %xmm1, %xmm0
13909 ; SSE2-NEXT: packuswb %xmm2, %xmm0
13910 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13913 ; SSE3-LABEL: ugt_23_v4i32:
13915 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13916 ; SSE3-NEXT: psrlw $1, %xmm1
13917 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13918 ; SSE3-NEXT: psubb %xmm1, %xmm0
13919 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13920 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13921 ; SSE3-NEXT: pand %xmm1, %xmm2
13922 ; SSE3-NEXT: psrlw $2, %xmm0
13923 ; SSE3-NEXT: pand %xmm1, %xmm0
13924 ; SSE3-NEXT: paddb %xmm2, %xmm0
13925 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13926 ; SSE3-NEXT: psrlw $4, %xmm1
13927 ; SSE3-NEXT: paddb %xmm1, %xmm0
13928 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13929 ; SSE3-NEXT: pxor %xmm1, %xmm1
13930 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13931 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13932 ; SSE3-NEXT: psadbw %xmm1, %xmm2
13933 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13934 ; SSE3-NEXT: psadbw %xmm1, %xmm0
13935 ; SSE3-NEXT: packuswb %xmm2, %xmm0
13936 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13939 ; SSSE3-LABEL: ugt_23_v4i32:
13941 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13942 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
13943 ; SSSE3-NEXT: pand %xmm2, %xmm3
13944 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13945 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
13946 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
13947 ; SSSE3-NEXT: psrlw $4, %xmm0
13948 ; SSSE3-NEXT: pand %xmm2, %xmm0
13949 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
13950 ; SSSE3-NEXT: paddb %xmm4, %xmm1
13951 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13952 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
13953 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13954 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
13955 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13956 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13957 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
13958 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13959 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
13962 ; SSE41-LABEL: ugt_23_v4i32:
13964 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13965 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13966 ; SSE41-NEXT: pand %xmm1, %xmm2
13967 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13968 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13969 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13970 ; SSE41-NEXT: psrlw $4, %xmm0
13971 ; SSE41-NEXT: pand %xmm1, %xmm0
13972 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13973 ; SSE41-NEXT: paddb %xmm4, %xmm3
13974 ; SSE41-NEXT: pxor %xmm1, %xmm1
13975 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
13976 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
13977 ; SSE41-NEXT: psadbw %xmm1, %xmm3
13978 ; SSE41-NEXT: psadbw %xmm1, %xmm0
13979 ; SSE41-NEXT: packuswb %xmm3, %xmm0
13980 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13983 ; AVX1-LABEL: ugt_23_v4i32:
13985 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13986 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13987 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13988 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13989 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13990 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13991 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13992 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13993 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13994 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13995 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13996 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13997 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13998 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13999 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
14002 ; AVX2-LABEL: ugt_23_v4i32:
14004 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14005 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14006 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14007 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14008 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14009 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14010 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14011 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14012 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14013 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14014 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14015 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14016 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14017 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14018 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
14019 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14022 ; AVX512VPOPCNTDQ-LABEL: ugt_23_v4i32:
14023 ; AVX512VPOPCNTDQ: # %bb.0:
14024 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14025 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14026 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
14027 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14028 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14029 ; AVX512VPOPCNTDQ-NEXT: retq
14031 ; AVX512VPOPCNTDQVL-LABEL: ugt_23_v4i32:
14032 ; AVX512VPOPCNTDQVL: # %bb.0:
14033 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14034 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14035 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14036 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14037 ; AVX512VPOPCNTDQVL-NEXT: retq
14039 ; BITALG_NOVLX-LABEL: ugt_23_v4i32:
14040 ; BITALG_NOVLX: # %bb.0:
14041 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14042 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14043 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14044 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14045 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14046 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14047 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14048 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14049 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
14050 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14051 ; BITALG_NOVLX-NEXT: vzeroupper
14052 ; BITALG_NOVLX-NEXT: retq
14054 ; BITALG-LABEL: ugt_23_v4i32:
14056 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
14057 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
14058 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14059 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14060 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14061 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14062 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14063 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14064 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14065 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14066 ; BITALG-NEXT: retq
14067 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14068 %3 = icmp ugt <4 x i32> %2, <i32 23, i32 23, i32 23, i32 23>
14069 %4 = sext <4 x i1> %3 to <4 x i32>
14073 define <4 x i32> @ult_24_v4i32(<4 x i32> %0) {
14074 ; SSE2-LABEL: ult_24_v4i32:
14076 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14077 ; SSE2-NEXT: psrlw $1, %xmm1
14078 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14079 ; SSE2-NEXT: psubb %xmm1, %xmm0
14080 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14081 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14082 ; SSE2-NEXT: pand %xmm1, %xmm2
14083 ; SSE2-NEXT: psrlw $2, %xmm0
14084 ; SSE2-NEXT: pand %xmm1, %xmm0
14085 ; SSE2-NEXT: paddb %xmm2, %xmm0
14086 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14087 ; SSE2-NEXT: psrlw $4, %xmm1
14088 ; SSE2-NEXT: paddb %xmm0, %xmm1
14089 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14090 ; SSE2-NEXT: pxor %xmm0, %xmm0
14091 ; SSE2-NEXT: movdqa %xmm1, %xmm2
14092 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14093 ; SSE2-NEXT: psadbw %xmm0, %xmm2
14094 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14095 ; SSE2-NEXT: psadbw %xmm0, %xmm1
14096 ; SSE2-NEXT: packuswb %xmm2, %xmm1
14097 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24]
14098 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
14101 ; SSE3-LABEL: ult_24_v4i32:
14103 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14104 ; SSE3-NEXT: psrlw $1, %xmm1
14105 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14106 ; SSE3-NEXT: psubb %xmm1, %xmm0
14107 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14108 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14109 ; SSE3-NEXT: pand %xmm1, %xmm2
14110 ; SSE3-NEXT: psrlw $2, %xmm0
14111 ; SSE3-NEXT: pand %xmm1, %xmm0
14112 ; SSE3-NEXT: paddb %xmm2, %xmm0
14113 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14114 ; SSE3-NEXT: psrlw $4, %xmm1
14115 ; SSE3-NEXT: paddb %xmm0, %xmm1
14116 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14117 ; SSE3-NEXT: pxor %xmm0, %xmm0
14118 ; SSE3-NEXT: movdqa %xmm1, %xmm2
14119 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14120 ; SSE3-NEXT: psadbw %xmm0, %xmm2
14121 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14122 ; SSE3-NEXT: psadbw %xmm0, %xmm1
14123 ; SSE3-NEXT: packuswb %xmm2, %xmm1
14124 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24]
14125 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
14128 ; SSSE3-LABEL: ult_24_v4i32:
14130 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14131 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
14132 ; SSSE3-NEXT: pand %xmm1, %xmm2
14133 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14134 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
14135 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
14136 ; SSSE3-NEXT: psrlw $4, %xmm0
14137 ; SSSE3-NEXT: pand %xmm1, %xmm0
14138 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
14139 ; SSSE3-NEXT: paddb %xmm4, %xmm3
14140 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14141 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
14142 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
14143 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14144 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
14145 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
14146 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
14147 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24]
14148 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
14151 ; SSE41-LABEL: ult_24_v4i32:
14153 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14154 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14155 ; SSE41-NEXT: pand %xmm1, %xmm2
14156 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14157 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14158 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14159 ; SSE41-NEXT: psrlw $4, %xmm0
14160 ; SSE41-NEXT: pand %xmm1, %xmm0
14161 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14162 ; SSE41-NEXT: paddb %xmm4, %xmm3
14163 ; SSE41-NEXT: pxor %xmm0, %xmm0
14164 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
14165 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
14166 ; SSE41-NEXT: psadbw %xmm0, %xmm3
14167 ; SSE41-NEXT: psadbw %xmm0, %xmm1
14168 ; SSE41-NEXT: packuswb %xmm3, %xmm1
14169 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24]
14170 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
14173 ; AVX1-LABEL: ult_24_v4i32:
14175 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14176 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14177 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14178 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14179 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14180 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14181 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14182 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14183 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14184 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14185 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14186 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14187 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14188 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14189 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24,24,24]
14190 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14193 ; AVX2-LABEL: ult_24_v4i32:
14195 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14196 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14197 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14198 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14199 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14200 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14201 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14202 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14203 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14204 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14205 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14206 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14207 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14208 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14209 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14210 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14213 ; AVX512VPOPCNTDQ-LABEL: ult_24_v4i32:
14214 ; AVX512VPOPCNTDQ: # %bb.0:
14215 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14216 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14217 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14218 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14219 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14220 ; AVX512VPOPCNTDQ-NEXT: retq
14222 ; AVX512VPOPCNTDQVL-LABEL: ult_24_v4i32:
14223 ; AVX512VPOPCNTDQVL: # %bb.0:
14224 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14225 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14226 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14227 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14228 ; AVX512VPOPCNTDQVL-NEXT: retq
14230 ; BITALG_NOVLX-LABEL: ult_24_v4i32:
14231 ; BITALG_NOVLX: # %bb.0:
14232 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14233 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14234 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14235 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14236 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14237 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14238 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14239 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14240 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14241 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14242 ; BITALG_NOVLX-NEXT: vzeroupper
14243 ; BITALG_NOVLX-NEXT: retq
14245 ; BITALG-LABEL: ult_24_v4i32:
14247 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
14248 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
14249 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14250 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14251 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14252 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14253 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14254 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14255 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14256 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14257 ; BITALG-NEXT: retq
14258 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14259 %3 = icmp ult <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24>
14260 %4 = sext <4 x i1> %3 to <4 x i32>
14264 define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) {
14265 ; SSE2-LABEL: ugt_24_v4i32:
14267 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14268 ; SSE2-NEXT: psrlw $1, %xmm1
14269 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14270 ; SSE2-NEXT: psubb %xmm1, %xmm0
14271 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14272 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14273 ; SSE2-NEXT: pand %xmm1, %xmm2
14274 ; SSE2-NEXT: psrlw $2, %xmm0
14275 ; SSE2-NEXT: pand %xmm1, %xmm0
14276 ; SSE2-NEXT: paddb %xmm2, %xmm0
14277 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14278 ; SSE2-NEXT: psrlw $4, %xmm1
14279 ; SSE2-NEXT: paddb %xmm1, %xmm0
14280 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14281 ; SSE2-NEXT: pxor %xmm1, %xmm1
14282 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14283 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
14284 ; SSE2-NEXT: psadbw %xmm1, %xmm2
14285 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14286 ; SSE2-NEXT: psadbw %xmm1, %xmm0
14287 ; SSE2-NEXT: packuswb %xmm2, %xmm0
14288 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14291 ; SSE3-LABEL: ugt_24_v4i32:
14293 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14294 ; SSE3-NEXT: psrlw $1, %xmm1
14295 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14296 ; SSE3-NEXT: psubb %xmm1, %xmm0
14297 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14298 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14299 ; SSE3-NEXT: pand %xmm1, %xmm2
14300 ; SSE3-NEXT: psrlw $2, %xmm0
14301 ; SSE3-NEXT: pand %xmm1, %xmm0
14302 ; SSE3-NEXT: paddb %xmm2, %xmm0
14303 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14304 ; SSE3-NEXT: psrlw $4, %xmm1
14305 ; SSE3-NEXT: paddb %xmm1, %xmm0
14306 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14307 ; SSE3-NEXT: pxor %xmm1, %xmm1
14308 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14309 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
14310 ; SSE3-NEXT: psadbw %xmm1, %xmm2
14311 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14312 ; SSE3-NEXT: psadbw %xmm1, %xmm0
14313 ; SSE3-NEXT: packuswb %xmm2, %xmm0
14314 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14317 ; SSSE3-LABEL: ugt_24_v4i32:
14319 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14320 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
14321 ; SSSE3-NEXT: pand %xmm2, %xmm3
14322 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14323 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
14324 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
14325 ; SSSE3-NEXT: psrlw $4, %xmm0
14326 ; SSSE3-NEXT: pand %xmm2, %xmm0
14327 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
14328 ; SSSE3-NEXT: paddb %xmm4, %xmm1
14329 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14330 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
14331 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14332 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
14333 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14334 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14335 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
14336 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14337 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
14340 ; SSE41-LABEL: ugt_24_v4i32:
14342 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14343 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14344 ; SSE41-NEXT: pand %xmm1, %xmm2
14345 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14346 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14347 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14348 ; SSE41-NEXT: psrlw $4, %xmm0
14349 ; SSE41-NEXT: pand %xmm1, %xmm0
14350 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14351 ; SSE41-NEXT: paddb %xmm4, %xmm3
14352 ; SSE41-NEXT: pxor %xmm1, %xmm1
14353 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
14354 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
14355 ; SSE41-NEXT: psadbw %xmm1, %xmm3
14356 ; SSE41-NEXT: psadbw %xmm1, %xmm0
14357 ; SSE41-NEXT: packuswb %xmm3, %xmm0
14358 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14361 ; AVX1-LABEL: ugt_24_v4i32:
14363 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14364 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14365 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14366 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14367 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14368 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14369 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14370 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14371 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14372 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14373 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14374 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14375 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14376 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14377 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
14380 ; AVX2-LABEL: ugt_24_v4i32:
14382 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14383 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14384 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14385 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14386 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14387 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14388 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14389 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14390 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14391 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14392 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14393 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14394 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14395 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14396 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14397 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14400 ; AVX512VPOPCNTDQ-LABEL: ugt_24_v4i32:
14401 ; AVX512VPOPCNTDQ: # %bb.0:
14402 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14403 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14404 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14405 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14406 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14407 ; AVX512VPOPCNTDQ-NEXT: retq
14409 ; AVX512VPOPCNTDQVL-LABEL: ugt_24_v4i32:
14410 ; AVX512VPOPCNTDQVL: # %bb.0:
14411 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14412 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14413 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14414 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14415 ; AVX512VPOPCNTDQVL-NEXT: retq
14417 ; BITALG_NOVLX-LABEL: ugt_24_v4i32:
14418 ; BITALG_NOVLX: # %bb.0:
14419 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14420 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14421 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14422 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14423 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14424 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14425 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14426 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14427 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14428 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14429 ; BITALG_NOVLX-NEXT: vzeroupper
14430 ; BITALG_NOVLX-NEXT: retq
14432 ; BITALG-LABEL: ugt_24_v4i32:
14434 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
14435 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
14436 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14437 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14438 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14439 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14440 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14441 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14442 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14443 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14444 ; BITALG-NEXT: retq
14445 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14446 %3 = icmp ugt <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24>
14447 %4 = sext <4 x i1> %3 to <4 x i32>
14451 define <4 x i32> @ult_25_v4i32(<4 x i32> %0) {
14452 ; SSE2-LABEL: ult_25_v4i32:
14454 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14455 ; SSE2-NEXT: psrlw $1, %xmm1
14456 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14457 ; SSE2-NEXT: psubb %xmm1, %xmm0
14458 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14459 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14460 ; SSE2-NEXT: pand %xmm1, %xmm2
14461 ; SSE2-NEXT: psrlw $2, %xmm0
14462 ; SSE2-NEXT: pand %xmm1, %xmm0
14463 ; SSE2-NEXT: paddb %xmm2, %xmm0
14464 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14465 ; SSE2-NEXT: psrlw $4, %xmm1
14466 ; SSE2-NEXT: paddb %xmm0, %xmm1
14467 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14468 ; SSE2-NEXT: pxor %xmm0, %xmm0
14469 ; SSE2-NEXT: movdqa %xmm1, %xmm2
14470 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14471 ; SSE2-NEXT: psadbw %xmm0, %xmm2
14472 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14473 ; SSE2-NEXT: psadbw %xmm0, %xmm1
14474 ; SSE2-NEXT: packuswb %xmm2, %xmm1
14475 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25]
14476 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
14479 ; SSE3-LABEL: ult_25_v4i32:
14481 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14482 ; SSE3-NEXT: psrlw $1, %xmm1
14483 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14484 ; SSE3-NEXT: psubb %xmm1, %xmm0
14485 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14486 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14487 ; SSE3-NEXT: pand %xmm1, %xmm2
14488 ; SSE3-NEXT: psrlw $2, %xmm0
14489 ; SSE3-NEXT: pand %xmm1, %xmm0
14490 ; SSE3-NEXT: paddb %xmm2, %xmm0
14491 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14492 ; SSE3-NEXT: psrlw $4, %xmm1
14493 ; SSE3-NEXT: paddb %xmm0, %xmm1
14494 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14495 ; SSE3-NEXT: pxor %xmm0, %xmm0
14496 ; SSE3-NEXT: movdqa %xmm1, %xmm2
14497 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14498 ; SSE3-NEXT: psadbw %xmm0, %xmm2
14499 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14500 ; SSE3-NEXT: psadbw %xmm0, %xmm1
14501 ; SSE3-NEXT: packuswb %xmm2, %xmm1
14502 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25]
14503 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
14506 ; SSSE3-LABEL: ult_25_v4i32:
14508 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14509 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
14510 ; SSSE3-NEXT: pand %xmm1, %xmm2
14511 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14512 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
14513 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
14514 ; SSSE3-NEXT: psrlw $4, %xmm0
14515 ; SSSE3-NEXT: pand %xmm1, %xmm0
14516 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
14517 ; SSSE3-NEXT: paddb %xmm4, %xmm3
14518 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14519 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
14520 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
14521 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14522 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
14523 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
14524 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
14525 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25]
14526 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
14529 ; SSE41-LABEL: ult_25_v4i32:
14531 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14532 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14533 ; SSE41-NEXT: pand %xmm1, %xmm2
14534 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14535 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14536 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14537 ; SSE41-NEXT: psrlw $4, %xmm0
14538 ; SSE41-NEXT: pand %xmm1, %xmm0
14539 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14540 ; SSE41-NEXT: paddb %xmm4, %xmm3
14541 ; SSE41-NEXT: pxor %xmm0, %xmm0
14542 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
14543 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
14544 ; SSE41-NEXT: psadbw %xmm0, %xmm3
14545 ; SSE41-NEXT: psadbw %xmm0, %xmm1
14546 ; SSE41-NEXT: packuswb %xmm3, %xmm1
14547 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25]
14548 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
14551 ; AVX1-LABEL: ult_25_v4i32:
14553 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14554 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14555 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14556 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14557 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14558 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14559 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14560 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14561 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14562 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14563 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14564 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14565 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14566 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14567 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25,25,25]
14568 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14571 ; AVX2-LABEL: ult_25_v4i32:
14573 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14574 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14575 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14576 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14577 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14578 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14579 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14580 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14581 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14582 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14583 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14584 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14585 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14586 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14587 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14588 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14591 ; AVX512VPOPCNTDQ-LABEL: ult_25_v4i32:
14592 ; AVX512VPOPCNTDQ: # %bb.0:
14593 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14594 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14595 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14596 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14597 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14598 ; AVX512VPOPCNTDQ-NEXT: retq
14600 ; AVX512VPOPCNTDQVL-LABEL: ult_25_v4i32:
14601 ; AVX512VPOPCNTDQVL: # %bb.0:
14602 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14603 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14604 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14605 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14606 ; AVX512VPOPCNTDQVL-NEXT: retq
14608 ; BITALG_NOVLX-LABEL: ult_25_v4i32:
14609 ; BITALG_NOVLX: # %bb.0:
14610 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14611 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14612 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14613 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14614 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14615 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14616 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14617 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14618 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14619 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14620 ; BITALG_NOVLX-NEXT: vzeroupper
14621 ; BITALG_NOVLX-NEXT: retq
14623 ; BITALG-LABEL: ult_25_v4i32:
14625 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
14626 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
14627 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14628 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14629 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14630 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14631 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14632 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14633 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14634 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14635 ; BITALG-NEXT: retq
14636 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14637 %3 = icmp ult <4 x i32> %2, <i32 25, i32 25, i32 25, i32 25>
14638 %4 = sext <4 x i1> %3 to <4 x i32>
14642 define <4 x i32> @ugt_25_v4i32(<4 x i32> %0) {
14643 ; SSE2-LABEL: ugt_25_v4i32:
14645 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14646 ; SSE2-NEXT: psrlw $1, %xmm1
14647 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14648 ; SSE2-NEXT: psubb %xmm1, %xmm0
14649 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14650 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14651 ; SSE2-NEXT: pand %xmm1, %xmm2
14652 ; SSE2-NEXT: psrlw $2, %xmm0
14653 ; SSE2-NEXT: pand %xmm1, %xmm0
14654 ; SSE2-NEXT: paddb %xmm2, %xmm0
14655 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14656 ; SSE2-NEXT: psrlw $4, %xmm1
14657 ; SSE2-NEXT: paddb %xmm1, %xmm0
14658 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14659 ; SSE2-NEXT: pxor %xmm1, %xmm1
14660 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14661 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
14662 ; SSE2-NEXT: psadbw %xmm1, %xmm2
14663 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14664 ; SSE2-NEXT: psadbw %xmm1, %xmm0
14665 ; SSE2-NEXT: packuswb %xmm2, %xmm0
14666 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14669 ; SSE3-LABEL: ugt_25_v4i32:
14671 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14672 ; SSE3-NEXT: psrlw $1, %xmm1
14673 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14674 ; SSE3-NEXT: psubb %xmm1, %xmm0
14675 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14676 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14677 ; SSE3-NEXT: pand %xmm1, %xmm2
14678 ; SSE3-NEXT: psrlw $2, %xmm0
14679 ; SSE3-NEXT: pand %xmm1, %xmm0
14680 ; SSE3-NEXT: paddb %xmm2, %xmm0
14681 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14682 ; SSE3-NEXT: psrlw $4, %xmm1
14683 ; SSE3-NEXT: paddb %xmm1, %xmm0
14684 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14685 ; SSE3-NEXT: pxor %xmm1, %xmm1
14686 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14687 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
14688 ; SSE3-NEXT: psadbw %xmm1, %xmm2
14689 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14690 ; SSE3-NEXT: psadbw %xmm1, %xmm0
14691 ; SSE3-NEXT: packuswb %xmm2, %xmm0
14692 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14695 ; SSSE3-LABEL: ugt_25_v4i32:
14697 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14698 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
14699 ; SSSE3-NEXT: pand %xmm2, %xmm3
14700 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14701 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
14702 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
14703 ; SSSE3-NEXT: psrlw $4, %xmm0
14704 ; SSSE3-NEXT: pand %xmm2, %xmm0
14705 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
14706 ; SSSE3-NEXT: paddb %xmm4, %xmm1
14707 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14708 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
14709 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14710 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
14711 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14712 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14713 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
14714 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14715 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
14718 ; SSE41-LABEL: ugt_25_v4i32:
14720 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14721 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14722 ; SSE41-NEXT: pand %xmm1, %xmm2
14723 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14724 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14725 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14726 ; SSE41-NEXT: psrlw $4, %xmm0
14727 ; SSE41-NEXT: pand %xmm1, %xmm0
14728 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14729 ; SSE41-NEXT: paddb %xmm4, %xmm3
14730 ; SSE41-NEXT: pxor %xmm1, %xmm1
14731 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
14732 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
14733 ; SSE41-NEXT: psadbw %xmm1, %xmm3
14734 ; SSE41-NEXT: psadbw %xmm1, %xmm0
14735 ; SSE41-NEXT: packuswb %xmm3, %xmm0
14736 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14739 ; AVX1-LABEL: ugt_25_v4i32:
14741 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14742 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14743 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14744 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14745 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14746 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14747 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14748 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14749 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14750 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14751 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14752 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14753 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14754 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14755 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
14758 ; AVX2-LABEL: ugt_25_v4i32:
14760 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14761 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14762 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14763 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14764 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14765 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14766 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14767 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14768 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14769 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14770 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14771 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14772 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14773 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14774 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14775 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14778 ; AVX512VPOPCNTDQ-LABEL: ugt_25_v4i32:
14779 ; AVX512VPOPCNTDQ: # %bb.0:
14780 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14781 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14782 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14783 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14784 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14785 ; AVX512VPOPCNTDQ-NEXT: retq
14787 ; AVX512VPOPCNTDQVL-LABEL: ugt_25_v4i32:
14788 ; AVX512VPOPCNTDQVL: # %bb.0:
14789 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14790 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14791 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14792 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14793 ; AVX512VPOPCNTDQVL-NEXT: retq
14795 ; BITALG_NOVLX-LABEL: ugt_25_v4i32:
14796 ; BITALG_NOVLX: # %bb.0:
14797 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14798 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14799 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14800 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14801 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14802 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14803 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14804 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14805 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14806 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14807 ; BITALG_NOVLX-NEXT: vzeroupper
14808 ; BITALG_NOVLX-NEXT: retq
14810 ; BITALG-LABEL: ugt_25_v4i32:
14812 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
14813 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
14814 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14815 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14816 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14817 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14818 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14819 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14820 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14821 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14822 ; BITALG-NEXT: retq
14823 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14824 %3 = icmp ugt <4 x i32> %2, <i32 25, i32 25, i32 25, i32 25>
14825 %4 = sext <4 x i1> %3 to <4 x i32>
14829 define <4 x i32> @ult_26_v4i32(<4 x i32> %0) {
14830 ; SSE2-LABEL: ult_26_v4i32:
14832 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14833 ; SSE2-NEXT: psrlw $1, %xmm1
14834 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14835 ; SSE2-NEXT: psubb %xmm1, %xmm0
14836 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14837 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14838 ; SSE2-NEXT: pand %xmm1, %xmm2
14839 ; SSE2-NEXT: psrlw $2, %xmm0
14840 ; SSE2-NEXT: pand %xmm1, %xmm0
14841 ; SSE2-NEXT: paddb %xmm2, %xmm0
14842 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14843 ; SSE2-NEXT: psrlw $4, %xmm1
14844 ; SSE2-NEXT: paddb %xmm0, %xmm1
14845 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14846 ; SSE2-NEXT: pxor %xmm0, %xmm0
14847 ; SSE2-NEXT: movdqa %xmm1, %xmm2
14848 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14849 ; SSE2-NEXT: psadbw %xmm0, %xmm2
14850 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14851 ; SSE2-NEXT: psadbw %xmm0, %xmm1
14852 ; SSE2-NEXT: packuswb %xmm2, %xmm1
14853 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26]
14854 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
14857 ; SSE3-LABEL: ult_26_v4i32:
14859 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14860 ; SSE3-NEXT: psrlw $1, %xmm1
14861 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14862 ; SSE3-NEXT: psubb %xmm1, %xmm0
14863 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14864 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14865 ; SSE3-NEXT: pand %xmm1, %xmm2
14866 ; SSE3-NEXT: psrlw $2, %xmm0
14867 ; SSE3-NEXT: pand %xmm1, %xmm0
14868 ; SSE3-NEXT: paddb %xmm2, %xmm0
14869 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14870 ; SSE3-NEXT: psrlw $4, %xmm1
14871 ; SSE3-NEXT: paddb %xmm0, %xmm1
14872 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14873 ; SSE3-NEXT: pxor %xmm0, %xmm0
14874 ; SSE3-NEXT: movdqa %xmm1, %xmm2
14875 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14876 ; SSE3-NEXT: psadbw %xmm0, %xmm2
14877 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14878 ; SSE3-NEXT: psadbw %xmm0, %xmm1
14879 ; SSE3-NEXT: packuswb %xmm2, %xmm1
14880 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26]
14881 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
14884 ; SSSE3-LABEL: ult_26_v4i32:
14886 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14887 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
14888 ; SSSE3-NEXT: pand %xmm1, %xmm2
14889 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14890 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
14891 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
14892 ; SSSE3-NEXT: psrlw $4, %xmm0
14893 ; SSSE3-NEXT: pand %xmm1, %xmm0
14894 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
14895 ; SSSE3-NEXT: paddb %xmm4, %xmm3
14896 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14897 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
14898 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
14899 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14900 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
14901 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
14902 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
14903 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26]
14904 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
14907 ; SSE41-LABEL: ult_26_v4i32:
14909 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14910 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14911 ; SSE41-NEXT: pand %xmm1, %xmm2
14912 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14913 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14914 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14915 ; SSE41-NEXT: psrlw $4, %xmm0
14916 ; SSE41-NEXT: pand %xmm1, %xmm0
14917 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14918 ; SSE41-NEXT: paddb %xmm4, %xmm3
14919 ; SSE41-NEXT: pxor %xmm0, %xmm0
14920 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
14921 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
14922 ; SSE41-NEXT: psadbw %xmm0, %xmm3
14923 ; SSE41-NEXT: psadbw %xmm0, %xmm1
14924 ; SSE41-NEXT: packuswb %xmm3, %xmm1
14925 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26]
14926 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
14929 ; AVX1-LABEL: ult_26_v4i32:
14931 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14932 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14933 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14934 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14935 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14936 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14937 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14938 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14939 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14940 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14941 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14942 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14943 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14944 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14945 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26,26,26]
14946 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14949 ; AVX2-LABEL: ult_26_v4i32:
14951 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14952 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14953 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14954 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14955 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14956 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14957 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14958 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14959 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14960 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14961 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14962 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14963 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14964 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14965 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
14966 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14969 ; AVX512VPOPCNTDQ-LABEL: ult_26_v4i32:
14970 ; AVX512VPOPCNTDQ: # %bb.0:
14971 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14972 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14973 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
14974 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14975 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14976 ; AVX512VPOPCNTDQ-NEXT: retq
14978 ; AVX512VPOPCNTDQVL-LABEL: ult_26_v4i32:
14979 ; AVX512VPOPCNTDQVL: # %bb.0:
14980 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14981 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
14982 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
14983 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
14984 ; AVX512VPOPCNTDQVL-NEXT: retq
14986 ; BITALG_NOVLX-LABEL: ult_26_v4i32:
14987 ; BITALG_NOVLX: # %bb.0:
14988 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14989 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14990 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14991 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14992 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14993 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14994 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14995 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14996 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
14997 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14998 ; BITALG_NOVLX-NEXT: vzeroupper
14999 ; BITALG_NOVLX-NEXT: retq
15001 ; BITALG-LABEL: ult_26_v4i32:
15003 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15004 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15005 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15006 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15007 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15008 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15009 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15010 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15011 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15012 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15013 ; BITALG-NEXT: retq
15014 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15015 %3 = icmp ult <4 x i32> %2, <i32 26, i32 26, i32 26, i32 26>
15016 %4 = sext <4 x i1> %3 to <4 x i32>
15020 define <4 x i32> @ugt_26_v4i32(<4 x i32> %0) {
15021 ; SSE2-LABEL: ugt_26_v4i32:
15023 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15024 ; SSE2-NEXT: psrlw $1, %xmm1
15025 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15026 ; SSE2-NEXT: psubb %xmm1, %xmm0
15027 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15028 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15029 ; SSE2-NEXT: pand %xmm1, %xmm2
15030 ; SSE2-NEXT: psrlw $2, %xmm0
15031 ; SSE2-NEXT: pand %xmm1, %xmm0
15032 ; SSE2-NEXT: paddb %xmm2, %xmm0
15033 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15034 ; SSE2-NEXT: psrlw $4, %xmm1
15035 ; SSE2-NEXT: paddb %xmm1, %xmm0
15036 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15037 ; SSE2-NEXT: pxor %xmm1, %xmm1
15038 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15039 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
15040 ; SSE2-NEXT: psadbw %xmm1, %xmm2
15041 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15042 ; SSE2-NEXT: psadbw %xmm1, %xmm0
15043 ; SSE2-NEXT: packuswb %xmm2, %xmm0
15044 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15047 ; SSE3-LABEL: ugt_26_v4i32:
15049 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15050 ; SSE3-NEXT: psrlw $1, %xmm1
15051 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15052 ; SSE3-NEXT: psubb %xmm1, %xmm0
15053 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15054 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15055 ; SSE3-NEXT: pand %xmm1, %xmm2
15056 ; SSE3-NEXT: psrlw $2, %xmm0
15057 ; SSE3-NEXT: pand %xmm1, %xmm0
15058 ; SSE3-NEXT: paddb %xmm2, %xmm0
15059 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15060 ; SSE3-NEXT: psrlw $4, %xmm1
15061 ; SSE3-NEXT: paddb %xmm1, %xmm0
15062 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15063 ; SSE3-NEXT: pxor %xmm1, %xmm1
15064 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15065 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
15066 ; SSE3-NEXT: psadbw %xmm1, %xmm2
15067 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15068 ; SSE3-NEXT: psadbw %xmm1, %xmm0
15069 ; SSE3-NEXT: packuswb %xmm2, %xmm0
15070 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15073 ; SSSE3-LABEL: ugt_26_v4i32:
15075 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15076 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
15077 ; SSSE3-NEXT: pand %xmm2, %xmm3
15078 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15079 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
15080 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
15081 ; SSSE3-NEXT: psrlw $4, %xmm0
15082 ; SSSE3-NEXT: pand %xmm2, %xmm0
15083 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
15084 ; SSSE3-NEXT: paddb %xmm4, %xmm1
15085 ; SSSE3-NEXT: pxor %xmm0, %xmm0
15086 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
15087 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15088 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
15089 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15090 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
15091 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
15092 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15093 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
15096 ; SSE41-LABEL: ugt_26_v4i32:
15098 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15099 ; SSE41-NEXT: movdqa %xmm0, %xmm2
15100 ; SSE41-NEXT: pand %xmm1, %xmm2
15101 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15102 ; SSE41-NEXT: movdqa %xmm3, %xmm4
15103 ; SSE41-NEXT: pshufb %xmm2, %xmm4
15104 ; SSE41-NEXT: psrlw $4, %xmm0
15105 ; SSE41-NEXT: pand %xmm1, %xmm0
15106 ; SSE41-NEXT: pshufb %xmm0, %xmm3
15107 ; SSE41-NEXT: paddb %xmm4, %xmm3
15108 ; SSE41-NEXT: pxor %xmm1, %xmm1
15109 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
15110 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
15111 ; SSE41-NEXT: psadbw %xmm1, %xmm3
15112 ; SSE41-NEXT: psadbw %xmm1, %xmm0
15113 ; SSE41-NEXT: packuswb %xmm3, %xmm0
15114 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15117 ; AVX1-LABEL: ugt_26_v4i32:
15119 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15120 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
15121 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15122 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15123 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
15124 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
15125 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15126 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15127 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
15128 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15129 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15130 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15131 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15132 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15133 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
15136 ; AVX2-LABEL: ugt_26_v4i32:
15138 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15139 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
15140 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15141 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15142 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
15143 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
15144 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15145 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15146 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
15147 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15148 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15149 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15150 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15151 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15152 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15153 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15156 ; AVX512VPOPCNTDQ-LABEL: ugt_26_v4i32:
15157 ; AVX512VPOPCNTDQ: # %bb.0:
15158 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15159 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15160 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15161 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15162 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15163 ; AVX512VPOPCNTDQ-NEXT: retq
15165 ; AVX512VPOPCNTDQVL-LABEL: ugt_26_v4i32:
15166 ; AVX512VPOPCNTDQVL: # %bb.0:
15167 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15168 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15169 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15170 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15171 ; AVX512VPOPCNTDQVL-NEXT: retq
15173 ; BITALG_NOVLX-LABEL: ugt_26_v4i32:
15174 ; BITALG_NOVLX: # %bb.0:
15175 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15176 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15177 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15178 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15179 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15180 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15181 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15182 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15183 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15184 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15185 ; BITALG_NOVLX-NEXT: vzeroupper
15186 ; BITALG_NOVLX-NEXT: retq
15188 ; BITALG-LABEL: ugt_26_v4i32:
15190 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15191 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15192 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15193 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15194 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15195 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15196 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15197 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15198 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15199 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15200 ; BITALG-NEXT: retq
15201 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15202 %3 = icmp ugt <4 x i32> %2, <i32 26, i32 26, i32 26, i32 26>
15203 %4 = sext <4 x i1> %3 to <4 x i32>
15207 define <4 x i32> @ult_27_v4i32(<4 x i32> %0) {
15208 ; SSE2-LABEL: ult_27_v4i32:
15210 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15211 ; SSE2-NEXT: psrlw $1, %xmm1
15212 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15213 ; SSE2-NEXT: psubb %xmm1, %xmm0
15214 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15215 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15216 ; SSE2-NEXT: pand %xmm1, %xmm2
15217 ; SSE2-NEXT: psrlw $2, %xmm0
15218 ; SSE2-NEXT: pand %xmm1, %xmm0
15219 ; SSE2-NEXT: paddb %xmm2, %xmm0
15220 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15221 ; SSE2-NEXT: psrlw $4, %xmm1
15222 ; SSE2-NEXT: paddb %xmm0, %xmm1
15223 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15224 ; SSE2-NEXT: pxor %xmm0, %xmm0
15225 ; SSE2-NEXT: movdqa %xmm1, %xmm2
15226 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15227 ; SSE2-NEXT: psadbw %xmm0, %xmm2
15228 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15229 ; SSE2-NEXT: psadbw %xmm0, %xmm1
15230 ; SSE2-NEXT: packuswb %xmm2, %xmm1
15231 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27]
15232 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
15235 ; SSE3-LABEL: ult_27_v4i32:
15237 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15238 ; SSE3-NEXT: psrlw $1, %xmm1
15239 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15240 ; SSE3-NEXT: psubb %xmm1, %xmm0
15241 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15242 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15243 ; SSE3-NEXT: pand %xmm1, %xmm2
15244 ; SSE3-NEXT: psrlw $2, %xmm0
15245 ; SSE3-NEXT: pand %xmm1, %xmm0
15246 ; SSE3-NEXT: paddb %xmm2, %xmm0
15247 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15248 ; SSE3-NEXT: psrlw $4, %xmm1
15249 ; SSE3-NEXT: paddb %xmm0, %xmm1
15250 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15251 ; SSE3-NEXT: pxor %xmm0, %xmm0
15252 ; SSE3-NEXT: movdqa %xmm1, %xmm2
15253 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15254 ; SSE3-NEXT: psadbw %xmm0, %xmm2
15255 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15256 ; SSE3-NEXT: psadbw %xmm0, %xmm1
15257 ; SSE3-NEXT: packuswb %xmm2, %xmm1
15258 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27]
15259 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
15262 ; SSSE3-LABEL: ult_27_v4i32:
15264 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15265 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
15266 ; SSSE3-NEXT: pand %xmm1, %xmm2
15267 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15268 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
15269 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
15270 ; SSSE3-NEXT: psrlw $4, %xmm0
15271 ; SSSE3-NEXT: pand %xmm1, %xmm0
15272 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
15273 ; SSSE3-NEXT: paddb %xmm4, %xmm3
15274 ; SSSE3-NEXT: pxor %xmm0, %xmm0
15275 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
15276 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
15277 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
15278 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
15279 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
15280 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
15281 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27]
15282 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
15285 ; SSE41-LABEL: ult_27_v4i32:
15287 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15288 ; SSE41-NEXT: movdqa %xmm0, %xmm2
15289 ; SSE41-NEXT: pand %xmm1, %xmm2
15290 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15291 ; SSE41-NEXT: movdqa %xmm3, %xmm4
15292 ; SSE41-NEXT: pshufb %xmm2, %xmm4
15293 ; SSE41-NEXT: psrlw $4, %xmm0
15294 ; SSE41-NEXT: pand %xmm1, %xmm0
15295 ; SSE41-NEXT: pshufb %xmm0, %xmm3
15296 ; SSE41-NEXT: paddb %xmm4, %xmm3
15297 ; SSE41-NEXT: pxor %xmm0, %xmm0
15298 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
15299 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
15300 ; SSE41-NEXT: psadbw %xmm0, %xmm3
15301 ; SSE41-NEXT: psadbw %xmm0, %xmm1
15302 ; SSE41-NEXT: packuswb %xmm3, %xmm1
15303 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27]
15304 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
15307 ; AVX1-LABEL: ult_27_v4i32:
15309 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15310 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
15311 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15312 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15313 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
15314 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
15315 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15316 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15317 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
15318 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15319 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15320 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15321 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15322 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15323 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27,27,27]
15324 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15327 ; AVX2-LABEL: ult_27_v4i32:
15329 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15330 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
15331 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15332 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15333 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
15334 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
15335 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15336 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15337 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
15338 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15339 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15340 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15341 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15342 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15343 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15344 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15347 ; AVX512VPOPCNTDQ-LABEL: ult_27_v4i32:
15348 ; AVX512VPOPCNTDQ: # %bb.0:
15349 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15350 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15351 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15352 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15353 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15354 ; AVX512VPOPCNTDQ-NEXT: retq
15356 ; AVX512VPOPCNTDQVL-LABEL: ult_27_v4i32:
15357 ; AVX512VPOPCNTDQVL: # %bb.0:
15358 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15359 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15360 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15361 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15362 ; AVX512VPOPCNTDQVL-NEXT: retq
15364 ; BITALG_NOVLX-LABEL: ult_27_v4i32:
15365 ; BITALG_NOVLX: # %bb.0:
15366 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15367 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15368 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15369 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15370 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15371 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15372 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15373 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15374 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15375 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15376 ; BITALG_NOVLX-NEXT: vzeroupper
15377 ; BITALG_NOVLX-NEXT: retq
15379 ; BITALG-LABEL: ult_27_v4i32:
15381 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15382 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15383 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15384 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15385 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15386 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15387 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15388 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15389 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15390 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15391 ; BITALG-NEXT: retq
15392 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15393 %3 = icmp ult <4 x i32> %2, <i32 27, i32 27, i32 27, i32 27>
15394 %4 = sext <4 x i1> %3 to <4 x i32>
15398 define <4 x i32> @ugt_27_v4i32(<4 x i32> %0) {
15399 ; SSE2-LABEL: ugt_27_v4i32:
15401 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15402 ; SSE2-NEXT: psrlw $1, %xmm1
15403 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15404 ; SSE2-NEXT: psubb %xmm1, %xmm0
15405 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15406 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15407 ; SSE2-NEXT: pand %xmm1, %xmm2
15408 ; SSE2-NEXT: psrlw $2, %xmm0
15409 ; SSE2-NEXT: pand %xmm1, %xmm0
15410 ; SSE2-NEXT: paddb %xmm2, %xmm0
15411 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15412 ; SSE2-NEXT: psrlw $4, %xmm1
15413 ; SSE2-NEXT: paddb %xmm1, %xmm0
15414 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15415 ; SSE2-NEXT: pxor %xmm1, %xmm1
15416 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15417 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
15418 ; SSE2-NEXT: psadbw %xmm1, %xmm2
15419 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15420 ; SSE2-NEXT: psadbw %xmm1, %xmm0
15421 ; SSE2-NEXT: packuswb %xmm2, %xmm0
15422 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15425 ; SSE3-LABEL: ugt_27_v4i32:
15427 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15428 ; SSE3-NEXT: psrlw $1, %xmm1
15429 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15430 ; SSE3-NEXT: psubb %xmm1, %xmm0
15431 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15432 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15433 ; SSE3-NEXT: pand %xmm1, %xmm2
15434 ; SSE3-NEXT: psrlw $2, %xmm0
15435 ; SSE3-NEXT: pand %xmm1, %xmm0
15436 ; SSE3-NEXT: paddb %xmm2, %xmm0
15437 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15438 ; SSE3-NEXT: psrlw $4, %xmm1
15439 ; SSE3-NEXT: paddb %xmm1, %xmm0
15440 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15441 ; SSE3-NEXT: pxor %xmm1, %xmm1
15442 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15443 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
15444 ; SSE3-NEXT: psadbw %xmm1, %xmm2
15445 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15446 ; SSE3-NEXT: psadbw %xmm1, %xmm0
15447 ; SSE3-NEXT: packuswb %xmm2, %xmm0
15448 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15451 ; SSSE3-LABEL: ugt_27_v4i32:
15453 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15454 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
15455 ; SSSE3-NEXT: pand %xmm2, %xmm3
15456 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15457 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
15458 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
15459 ; SSSE3-NEXT: psrlw $4, %xmm0
15460 ; SSSE3-NEXT: pand %xmm2, %xmm0
15461 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
15462 ; SSSE3-NEXT: paddb %xmm4, %xmm1
15463 ; SSSE3-NEXT: pxor %xmm0, %xmm0
15464 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
15465 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15466 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
15467 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15468 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
15469 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
15470 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15471 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
15474 ; SSE41-LABEL: ugt_27_v4i32:
15476 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15477 ; SSE41-NEXT: movdqa %xmm0, %xmm2
15478 ; SSE41-NEXT: pand %xmm1, %xmm2
15479 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15480 ; SSE41-NEXT: movdqa %xmm3, %xmm4
15481 ; SSE41-NEXT: pshufb %xmm2, %xmm4
15482 ; SSE41-NEXT: psrlw $4, %xmm0
15483 ; SSE41-NEXT: pand %xmm1, %xmm0
15484 ; SSE41-NEXT: pshufb %xmm0, %xmm3
15485 ; SSE41-NEXT: paddb %xmm4, %xmm3
15486 ; SSE41-NEXT: pxor %xmm1, %xmm1
15487 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
15488 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
15489 ; SSE41-NEXT: psadbw %xmm1, %xmm3
15490 ; SSE41-NEXT: psadbw %xmm1, %xmm0
15491 ; SSE41-NEXT: packuswb %xmm3, %xmm0
15492 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15495 ; AVX1-LABEL: ugt_27_v4i32:
15497 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15498 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
15499 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15500 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15501 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
15502 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
15503 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15504 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15505 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
15506 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15507 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15508 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15509 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15510 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15511 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
15514 ; AVX2-LABEL: ugt_27_v4i32:
15516 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15517 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
15518 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15519 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15520 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
15521 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
15522 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15523 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15524 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
15525 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15526 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15527 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15528 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15529 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15530 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15531 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15534 ; AVX512VPOPCNTDQ-LABEL: ugt_27_v4i32:
15535 ; AVX512VPOPCNTDQ: # %bb.0:
15536 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15537 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15538 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15539 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15540 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15541 ; AVX512VPOPCNTDQ-NEXT: retq
15543 ; AVX512VPOPCNTDQVL-LABEL: ugt_27_v4i32:
15544 ; AVX512VPOPCNTDQVL: # %bb.0:
15545 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15546 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15547 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15548 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15549 ; AVX512VPOPCNTDQVL-NEXT: retq
15551 ; BITALG_NOVLX-LABEL: ugt_27_v4i32:
15552 ; BITALG_NOVLX: # %bb.0:
15553 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15554 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15555 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15556 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15557 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15558 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15559 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15560 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15561 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15562 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15563 ; BITALG_NOVLX-NEXT: vzeroupper
15564 ; BITALG_NOVLX-NEXT: retq
15566 ; BITALG-LABEL: ugt_27_v4i32:
15568 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15569 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15570 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15571 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15572 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15573 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15574 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15575 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15576 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15577 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15578 ; BITALG-NEXT: retq
15579 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15580 %3 = icmp ugt <4 x i32> %2, <i32 27, i32 27, i32 27, i32 27>
15581 %4 = sext <4 x i1> %3 to <4 x i32>
15585 define <4 x i32> @ult_28_v4i32(<4 x i32> %0) {
15586 ; SSE2-LABEL: ult_28_v4i32:
15588 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15589 ; SSE2-NEXT: psrlw $1, %xmm1
15590 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15591 ; SSE2-NEXT: psubb %xmm1, %xmm0
15592 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15593 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15594 ; SSE2-NEXT: pand %xmm1, %xmm2
15595 ; SSE2-NEXT: psrlw $2, %xmm0
15596 ; SSE2-NEXT: pand %xmm1, %xmm0
15597 ; SSE2-NEXT: paddb %xmm2, %xmm0
15598 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15599 ; SSE2-NEXT: psrlw $4, %xmm1
15600 ; SSE2-NEXT: paddb %xmm0, %xmm1
15601 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15602 ; SSE2-NEXT: pxor %xmm0, %xmm0
15603 ; SSE2-NEXT: movdqa %xmm1, %xmm2
15604 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15605 ; SSE2-NEXT: psadbw %xmm0, %xmm2
15606 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15607 ; SSE2-NEXT: psadbw %xmm0, %xmm1
15608 ; SSE2-NEXT: packuswb %xmm2, %xmm1
15609 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28]
15610 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
15613 ; SSE3-LABEL: ult_28_v4i32:
15615 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15616 ; SSE3-NEXT: psrlw $1, %xmm1
15617 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15618 ; SSE3-NEXT: psubb %xmm1, %xmm0
15619 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15620 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15621 ; SSE3-NEXT: pand %xmm1, %xmm2
15622 ; SSE3-NEXT: psrlw $2, %xmm0
15623 ; SSE3-NEXT: pand %xmm1, %xmm0
15624 ; SSE3-NEXT: paddb %xmm2, %xmm0
15625 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15626 ; SSE3-NEXT: psrlw $4, %xmm1
15627 ; SSE3-NEXT: paddb %xmm0, %xmm1
15628 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15629 ; SSE3-NEXT: pxor %xmm0, %xmm0
15630 ; SSE3-NEXT: movdqa %xmm1, %xmm2
15631 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15632 ; SSE3-NEXT: psadbw %xmm0, %xmm2
15633 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15634 ; SSE3-NEXT: psadbw %xmm0, %xmm1
15635 ; SSE3-NEXT: packuswb %xmm2, %xmm1
15636 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28]
15637 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
15640 ; SSSE3-LABEL: ult_28_v4i32:
15642 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15643 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
15644 ; SSSE3-NEXT: pand %xmm1, %xmm2
15645 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15646 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
15647 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
15648 ; SSSE3-NEXT: psrlw $4, %xmm0
15649 ; SSSE3-NEXT: pand %xmm1, %xmm0
15650 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
15651 ; SSSE3-NEXT: paddb %xmm4, %xmm3
15652 ; SSSE3-NEXT: pxor %xmm0, %xmm0
15653 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
15654 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
15655 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
15656 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
15657 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
15658 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
15659 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28]
15660 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
15663 ; SSE41-LABEL: ult_28_v4i32:
15665 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15666 ; SSE41-NEXT: movdqa %xmm0, %xmm2
15667 ; SSE41-NEXT: pand %xmm1, %xmm2
15668 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15669 ; SSE41-NEXT: movdqa %xmm3, %xmm4
15670 ; SSE41-NEXT: pshufb %xmm2, %xmm4
15671 ; SSE41-NEXT: psrlw $4, %xmm0
15672 ; SSE41-NEXT: pand %xmm1, %xmm0
15673 ; SSE41-NEXT: pshufb %xmm0, %xmm3
15674 ; SSE41-NEXT: paddb %xmm4, %xmm3
15675 ; SSE41-NEXT: pxor %xmm0, %xmm0
15676 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
15677 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
15678 ; SSE41-NEXT: psadbw %xmm0, %xmm3
15679 ; SSE41-NEXT: psadbw %xmm0, %xmm1
15680 ; SSE41-NEXT: packuswb %xmm3, %xmm1
15681 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28]
15682 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
15685 ; AVX1-LABEL: ult_28_v4i32:
15687 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15688 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
15689 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15690 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15691 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
15692 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
15693 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15694 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15695 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
15696 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15697 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15698 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15699 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15700 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15701 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28,28,28]
15702 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15705 ; AVX2-LABEL: ult_28_v4i32:
15707 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15708 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
15709 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15710 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15711 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
15712 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
15713 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15714 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15715 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
15716 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15717 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15718 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15719 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15720 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15721 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15722 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15725 ; AVX512VPOPCNTDQ-LABEL: ult_28_v4i32:
15726 ; AVX512VPOPCNTDQ: # %bb.0:
15727 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15728 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15729 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15730 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15731 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15732 ; AVX512VPOPCNTDQ-NEXT: retq
15734 ; AVX512VPOPCNTDQVL-LABEL: ult_28_v4i32:
15735 ; AVX512VPOPCNTDQVL: # %bb.0:
15736 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15737 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15738 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15739 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15740 ; AVX512VPOPCNTDQVL-NEXT: retq
15742 ; BITALG_NOVLX-LABEL: ult_28_v4i32:
15743 ; BITALG_NOVLX: # %bb.0:
15744 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15745 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15746 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15747 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15748 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15749 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15750 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15751 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15752 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15753 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15754 ; BITALG_NOVLX-NEXT: vzeroupper
15755 ; BITALG_NOVLX-NEXT: retq
15757 ; BITALG-LABEL: ult_28_v4i32:
15759 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15760 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15761 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15762 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15763 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15764 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15765 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15766 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15767 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15768 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15769 ; BITALG-NEXT: retq
15770 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15771 %3 = icmp ult <4 x i32> %2, <i32 28, i32 28, i32 28, i32 28>
15772 %4 = sext <4 x i1> %3 to <4 x i32>
15776 define <4 x i32> @ugt_28_v4i32(<4 x i32> %0) {
15777 ; SSE2-LABEL: ugt_28_v4i32:
15779 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15780 ; SSE2-NEXT: psrlw $1, %xmm1
15781 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15782 ; SSE2-NEXT: psubb %xmm1, %xmm0
15783 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15784 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15785 ; SSE2-NEXT: pand %xmm1, %xmm2
15786 ; SSE2-NEXT: psrlw $2, %xmm0
15787 ; SSE2-NEXT: pand %xmm1, %xmm0
15788 ; SSE2-NEXT: paddb %xmm2, %xmm0
15789 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15790 ; SSE2-NEXT: psrlw $4, %xmm1
15791 ; SSE2-NEXT: paddb %xmm1, %xmm0
15792 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15793 ; SSE2-NEXT: pxor %xmm1, %xmm1
15794 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15795 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
15796 ; SSE2-NEXT: psadbw %xmm1, %xmm2
15797 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15798 ; SSE2-NEXT: psadbw %xmm1, %xmm0
15799 ; SSE2-NEXT: packuswb %xmm2, %xmm0
15800 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15803 ; SSE3-LABEL: ugt_28_v4i32:
15805 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15806 ; SSE3-NEXT: psrlw $1, %xmm1
15807 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15808 ; SSE3-NEXT: psubb %xmm1, %xmm0
15809 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15810 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15811 ; SSE3-NEXT: pand %xmm1, %xmm2
15812 ; SSE3-NEXT: psrlw $2, %xmm0
15813 ; SSE3-NEXT: pand %xmm1, %xmm0
15814 ; SSE3-NEXT: paddb %xmm2, %xmm0
15815 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15816 ; SSE3-NEXT: psrlw $4, %xmm1
15817 ; SSE3-NEXT: paddb %xmm1, %xmm0
15818 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15819 ; SSE3-NEXT: pxor %xmm1, %xmm1
15820 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15821 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
15822 ; SSE3-NEXT: psadbw %xmm1, %xmm2
15823 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15824 ; SSE3-NEXT: psadbw %xmm1, %xmm0
15825 ; SSE3-NEXT: packuswb %xmm2, %xmm0
15826 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15829 ; SSSE3-LABEL: ugt_28_v4i32:
15831 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15832 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
15833 ; SSSE3-NEXT: pand %xmm2, %xmm3
15834 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15835 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
15836 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
15837 ; SSSE3-NEXT: psrlw $4, %xmm0
15838 ; SSSE3-NEXT: pand %xmm2, %xmm0
15839 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
15840 ; SSSE3-NEXT: paddb %xmm4, %xmm1
15841 ; SSSE3-NEXT: pxor %xmm0, %xmm0
15842 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
15843 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15844 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
15845 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15846 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
15847 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
15848 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15849 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
15852 ; SSE41-LABEL: ugt_28_v4i32:
15854 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15855 ; SSE41-NEXT: movdqa %xmm0, %xmm2
15856 ; SSE41-NEXT: pand %xmm1, %xmm2
15857 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15858 ; SSE41-NEXT: movdqa %xmm3, %xmm4
15859 ; SSE41-NEXT: pshufb %xmm2, %xmm4
15860 ; SSE41-NEXT: psrlw $4, %xmm0
15861 ; SSE41-NEXT: pand %xmm1, %xmm0
15862 ; SSE41-NEXT: pshufb %xmm0, %xmm3
15863 ; SSE41-NEXT: paddb %xmm4, %xmm3
15864 ; SSE41-NEXT: pxor %xmm1, %xmm1
15865 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
15866 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
15867 ; SSE41-NEXT: psadbw %xmm1, %xmm3
15868 ; SSE41-NEXT: psadbw %xmm1, %xmm0
15869 ; SSE41-NEXT: packuswb %xmm3, %xmm0
15870 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15873 ; AVX1-LABEL: ugt_28_v4i32:
15875 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15876 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
15877 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15878 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15879 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
15880 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
15881 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15882 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15883 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
15884 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15885 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15886 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15887 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15888 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15889 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
15892 ; AVX2-LABEL: ugt_28_v4i32:
15894 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15895 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
15896 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15897 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15898 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
15899 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
15900 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15901 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15902 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
15903 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15904 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15905 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15906 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15907 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15908 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15909 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15912 ; AVX512VPOPCNTDQ-LABEL: ugt_28_v4i32:
15913 ; AVX512VPOPCNTDQ: # %bb.0:
15914 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15915 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15916 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15917 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15918 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15919 ; AVX512VPOPCNTDQ-NEXT: retq
15921 ; AVX512VPOPCNTDQVL-LABEL: ugt_28_v4i32:
15922 ; AVX512VPOPCNTDQVL: # %bb.0:
15923 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15924 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15925 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15926 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15927 ; AVX512VPOPCNTDQVL-NEXT: retq
15929 ; BITALG_NOVLX-LABEL: ugt_28_v4i32:
15930 ; BITALG_NOVLX: # %bb.0:
15931 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15932 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15933 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15934 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15935 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15936 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15937 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15938 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15939 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15940 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15941 ; BITALG_NOVLX-NEXT: vzeroupper
15942 ; BITALG_NOVLX-NEXT: retq
15944 ; BITALG-LABEL: ugt_28_v4i32:
15946 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15947 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15948 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15949 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15950 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15951 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15952 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15953 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
15954 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
15955 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
15956 ; BITALG-NEXT: retq
15957 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15958 %3 = icmp ugt <4 x i32> %2, <i32 28, i32 28, i32 28, i32 28>
15959 %4 = sext <4 x i1> %3 to <4 x i32>
15963 define <4 x i32> @ult_29_v4i32(<4 x i32> %0) {
15964 ; SSE2-LABEL: ult_29_v4i32:
15966 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15967 ; SSE2-NEXT: psrlw $1, %xmm1
15968 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15969 ; SSE2-NEXT: psubb %xmm1, %xmm0
15970 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15971 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15972 ; SSE2-NEXT: pand %xmm1, %xmm2
15973 ; SSE2-NEXT: psrlw $2, %xmm0
15974 ; SSE2-NEXT: pand %xmm1, %xmm0
15975 ; SSE2-NEXT: paddb %xmm2, %xmm0
15976 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15977 ; SSE2-NEXT: psrlw $4, %xmm1
15978 ; SSE2-NEXT: paddb %xmm0, %xmm1
15979 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15980 ; SSE2-NEXT: pxor %xmm0, %xmm0
15981 ; SSE2-NEXT: movdqa %xmm1, %xmm2
15982 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15983 ; SSE2-NEXT: psadbw %xmm0, %xmm2
15984 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15985 ; SSE2-NEXT: psadbw %xmm0, %xmm1
15986 ; SSE2-NEXT: packuswb %xmm2, %xmm1
15987 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29]
15988 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
15991 ; SSE3-LABEL: ult_29_v4i32:
15993 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15994 ; SSE3-NEXT: psrlw $1, %xmm1
15995 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15996 ; SSE3-NEXT: psubb %xmm1, %xmm0
15997 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15998 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15999 ; SSE3-NEXT: pand %xmm1, %xmm2
16000 ; SSE3-NEXT: psrlw $2, %xmm0
16001 ; SSE3-NEXT: pand %xmm1, %xmm0
16002 ; SSE3-NEXT: paddb %xmm2, %xmm0
16003 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16004 ; SSE3-NEXT: psrlw $4, %xmm1
16005 ; SSE3-NEXT: paddb %xmm0, %xmm1
16006 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16007 ; SSE3-NEXT: pxor %xmm0, %xmm0
16008 ; SSE3-NEXT: movdqa %xmm1, %xmm2
16009 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16010 ; SSE3-NEXT: psadbw %xmm0, %xmm2
16011 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16012 ; SSE3-NEXT: psadbw %xmm0, %xmm1
16013 ; SSE3-NEXT: packuswb %xmm2, %xmm1
16014 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29]
16015 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
16018 ; SSSE3-LABEL: ult_29_v4i32:
16020 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16021 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
16022 ; SSSE3-NEXT: pand %xmm1, %xmm2
16023 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16024 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
16025 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
16026 ; SSSE3-NEXT: psrlw $4, %xmm0
16027 ; SSSE3-NEXT: pand %xmm1, %xmm0
16028 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
16029 ; SSSE3-NEXT: paddb %xmm4, %xmm3
16030 ; SSSE3-NEXT: pxor %xmm0, %xmm0
16031 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
16032 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
16033 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
16034 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
16035 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
16036 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
16037 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29]
16038 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
16041 ; SSE41-LABEL: ult_29_v4i32:
16043 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16044 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16045 ; SSE41-NEXT: pand %xmm1, %xmm2
16046 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16047 ; SSE41-NEXT: movdqa %xmm3, %xmm4
16048 ; SSE41-NEXT: pshufb %xmm2, %xmm4
16049 ; SSE41-NEXT: psrlw $4, %xmm0
16050 ; SSE41-NEXT: pand %xmm1, %xmm0
16051 ; SSE41-NEXT: pshufb %xmm0, %xmm3
16052 ; SSE41-NEXT: paddb %xmm4, %xmm3
16053 ; SSE41-NEXT: pxor %xmm0, %xmm0
16054 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
16055 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
16056 ; SSE41-NEXT: psadbw %xmm0, %xmm3
16057 ; SSE41-NEXT: psadbw %xmm0, %xmm1
16058 ; SSE41-NEXT: packuswb %xmm3, %xmm1
16059 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29]
16060 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
16063 ; AVX1-LABEL: ult_29_v4i32:
16065 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16066 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
16067 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16068 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16069 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
16070 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
16071 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16072 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16073 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
16074 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16075 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16076 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16077 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16078 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16079 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29,29,29]
16080 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16083 ; AVX2-LABEL: ult_29_v4i32:
16085 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16086 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
16087 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16088 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16089 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
16090 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
16091 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16092 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16093 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
16094 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16095 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16096 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16097 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16098 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16099 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16100 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16103 ; AVX512VPOPCNTDQ-LABEL: ult_29_v4i32:
16104 ; AVX512VPOPCNTDQ: # %bb.0:
16105 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16106 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
16107 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16108 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16109 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16110 ; AVX512VPOPCNTDQ-NEXT: retq
16112 ; AVX512VPOPCNTDQVL-LABEL: ult_29_v4i32:
16113 ; AVX512VPOPCNTDQVL: # %bb.0:
16114 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
16115 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
16116 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16117 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16118 ; AVX512VPOPCNTDQVL-NEXT: retq
16120 ; BITALG_NOVLX-LABEL: ult_29_v4i32:
16121 ; BITALG_NOVLX: # %bb.0:
16122 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16123 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
16124 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16125 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16126 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16127 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16128 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16129 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16130 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16131 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16132 ; BITALG_NOVLX-NEXT: vzeroupper
16133 ; BITALG_NOVLX-NEXT: retq
16135 ; BITALG-LABEL: ult_29_v4i32:
16137 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
16138 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16139 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16140 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16141 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16142 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16143 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16144 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
16145 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16146 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16147 ; BITALG-NEXT: retq
16148 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16149 %3 = icmp ult <4 x i32> %2, <i32 29, i32 29, i32 29, i32 29>
16150 %4 = sext <4 x i1> %3 to <4 x i32>
16154 define <4 x i32> @ugt_29_v4i32(<4 x i32> %0) {
16155 ; SSE2-LABEL: ugt_29_v4i32:
16157 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16158 ; SSE2-NEXT: psrlw $1, %xmm1
16159 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16160 ; SSE2-NEXT: psubb %xmm1, %xmm0
16161 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16162 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16163 ; SSE2-NEXT: pand %xmm1, %xmm2
16164 ; SSE2-NEXT: psrlw $2, %xmm0
16165 ; SSE2-NEXT: pand %xmm1, %xmm0
16166 ; SSE2-NEXT: paddb %xmm2, %xmm0
16167 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16168 ; SSE2-NEXT: psrlw $4, %xmm1
16169 ; SSE2-NEXT: paddb %xmm1, %xmm0
16170 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16171 ; SSE2-NEXT: pxor %xmm1, %xmm1
16172 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16173 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
16174 ; SSE2-NEXT: psadbw %xmm1, %xmm2
16175 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16176 ; SSE2-NEXT: psadbw %xmm1, %xmm0
16177 ; SSE2-NEXT: packuswb %xmm2, %xmm0
16178 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16181 ; SSE3-LABEL: ugt_29_v4i32:
16183 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16184 ; SSE3-NEXT: psrlw $1, %xmm1
16185 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16186 ; SSE3-NEXT: psubb %xmm1, %xmm0
16187 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16188 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16189 ; SSE3-NEXT: pand %xmm1, %xmm2
16190 ; SSE3-NEXT: psrlw $2, %xmm0
16191 ; SSE3-NEXT: pand %xmm1, %xmm0
16192 ; SSE3-NEXT: paddb %xmm2, %xmm0
16193 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16194 ; SSE3-NEXT: psrlw $4, %xmm1
16195 ; SSE3-NEXT: paddb %xmm1, %xmm0
16196 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16197 ; SSE3-NEXT: pxor %xmm1, %xmm1
16198 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16199 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
16200 ; SSE3-NEXT: psadbw %xmm1, %xmm2
16201 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16202 ; SSE3-NEXT: psadbw %xmm1, %xmm0
16203 ; SSE3-NEXT: packuswb %xmm2, %xmm0
16204 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16207 ; SSSE3-LABEL: ugt_29_v4i32:
16209 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16210 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
16211 ; SSSE3-NEXT: pand %xmm2, %xmm3
16212 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16213 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
16214 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
16215 ; SSSE3-NEXT: psrlw $4, %xmm0
16216 ; SSSE3-NEXT: pand %xmm2, %xmm0
16217 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
16218 ; SSSE3-NEXT: paddb %xmm4, %xmm1
16219 ; SSSE3-NEXT: pxor %xmm0, %xmm0
16220 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
16221 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16222 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
16223 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16224 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
16225 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
16226 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16227 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
16230 ; SSE41-LABEL: ugt_29_v4i32:
16232 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16233 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16234 ; SSE41-NEXT: pand %xmm1, %xmm2
16235 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16236 ; SSE41-NEXT: movdqa %xmm3, %xmm4
16237 ; SSE41-NEXT: pshufb %xmm2, %xmm4
16238 ; SSE41-NEXT: psrlw $4, %xmm0
16239 ; SSE41-NEXT: pand %xmm1, %xmm0
16240 ; SSE41-NEXT: pshufb %xmm0, %xmm3
16241 ; SSE41-NEXT: paddb %xmm4, %xmm3
16242 ; SSE41-NEXT: pxor %xmm1, %xmm1
16243 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
16244 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
16245 ; SSE41-NEXT: psadbw %xmm1, %xmm3
16246 ; SSE41-NEXT: psadbw %xmm1, %xmm0
16247 ; SSE41-NEXT: packuswb %xmm3, %xmm0
16248 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16251 ; AVX1-LABEL: ugt_29_v4i32:
16253 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16254 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
16255 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16256 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16257 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
16258 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
16259 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16260 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16261 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
16262 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16263 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16264 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16265 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16266 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16267 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
16270 ; AVX2-LABEL: ugt_29_v4i32:
16272 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16273 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
16274 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16275 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16276 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
16277 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
16278 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16279 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16280 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
16281 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16282 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16283 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16284 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16285 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16286 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16287 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16290 ; AVX512VPOPCNTDQ-LABEL: ugt_29_v4i32:
16291 ; AVX512VPOPCNTDQ: # %bb.0:
16292 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16293 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
16294 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16295 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16296 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16297 ; AVX512VPOPCNTDQ-NEXT: retq
16299 ; AVX512VPOPCNTDQVL-LABEL: ugt_29_v4i32:
16300 ; AVX512VPOPCNTDQVL: # %bb.0:
16301 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
16302 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
16303 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16304 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16305 ; AVX512VPOPCNTDQVL-NEXT: retq
16307 ; BITALG_NOVLX-LABEL: ugt_29_v4i32:
16308 ; BITALG_NOVLX: # %bb.0:
16309 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16310 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
16311 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16312 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16313 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16314 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16315 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16316 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16317 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16318 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16319 ; BITALG_NOVLX-NEXT: vzeroupper
16320 ; BITALG_NOVLX-NEXT: retq
16322 ; BITALG-LABEL: ugt_29_v4i32:
16324 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
16325 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16326 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16327 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16328 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16329 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16330 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16331 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
16332 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16333 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16334 ; BITALG-NEXT: retq
16335 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16336 %3 = icmp ugt <4 x i32> %2, <i32 29, i32 29, i32 29, i32 29>
16337 %4 = sext <4 x i1> %3 to <4 x i32>
16341 define <4 x i32> @ult_30_v4i32(<4 x i32> %0) {
16342 ; SSE2-LABEL: ult_30_v4i32:
16344 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16345 ; SSE2-NEXT: psrlw $1, %xmm1
16346 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16347 ; SSE2-NEXT: psubb %xmm1, %xmm0
16348 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16349 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16350 ; SSE2-NEXT: pand %xmm1, %xmm2
16351 ; SSE2-NEXT: psrlw $2, %xmm0
16352 ; SSE2-NEXT: pand %xmm1, %xmm0
16353 ; SSE2-NEXT: paddb %xmm2, %xmm0
16354 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16355 ; SSE2-NEXT: psrlw $4, %xmm1
16356 ; SSE2-NEXT: paddb %xmm0, %xmm1
16357 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16358 ; SSE2-NEXT: pxor %xmm0, %xmm0
16359 ; SSE2-NEXT: movdqa %xmm1, %xmm2
16360 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16361 ; SSE2-NEXT: psadbw %xmm0, %xmm2
16362 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16363 ; SSE2-NEXT: psadbw %xmm0, %xmm1
16364 ; SSE2-NEXT: packuswb %xmm2, %xmm1
16365 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30]
16366 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
16369 ; SSE3-LABEL: ult_30_v4i32:
16371 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16372 ; SSE3-NEXT: psrlw $1, %xmm1
16373 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16374 ; SSE3-NEXT: psubb %xmm1, %xmm0
16375 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16376 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16377 ; SSE3-NEXT: pand %xmm1, %xmm2
16378 ; SSE3-NEXT: psrlw $2, %xmm0
16379 ; SSE3-NEXT: pand %xmm1, %xmm0
16380 ; SSE3-NEXT: paddb %xmm2, %xmm0
16381 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16382 ; SSE3-NEXT: psrlw $4, %xmm1
16383 ; SSE3-NEXT: paddb %xmm0, %xmm1
16384 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16385 ; SSE3-NEXT: pxor %xmm0, %xmm0
16386 ; SSE3-NEXT: movdqa %xmm1, %xmm2
16387 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16388 ; SSE3-NEXT: psadbw %xmm0, %xmm2
16389 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16390 ; SSE3-NEXT: psadbw %xmm0, %xmm1
16391 ; SSE3-NEXT: packuswb %xmm2, %xmm1
16392 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30]
16393 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
16396 ; SSSE3-LABEL: ult_30_v4i32:
16398 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16399 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
16400 ; SSSE3-NEXT: pand %xmm1, %xmm2
16401 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16402 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
16403 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
16404 ; SSSE3-NEXT: psrlw $4, %xmm0
16405 ; SSSE3-NEXT: pand %xmm1, %xmm0
16406 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
16407 ; SSSE3-NEXT: paddb %xmm4, %xmm3
16408 ; SSSE3-NEXT: pxor %xmm0, %xmm0
16409 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
16410 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
16411 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
16412 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
16413 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
16414 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
16415 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30]
16416 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
16419 ; SSE41-LABEL: ult_30_v4i32:
16421 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16422 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16423 ; SSE41-NEXT: pand %xmm1, %xmm2
16424 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16425 ; SSE41-NEXT: movdqa %xmm3, %xmm4
16426 ; SSE41-NEXT: pshufb %xmm2, %xmm4
16427 ; SSE41-NEXT: psrlw $4, %xmm0
16428 ; SSE41-NEXT: pand %xmm1, %xmm0
16429 ; SSE41-NEXT: pshufb %xmm0, %xmm3
16430 ; SSE41-NEXT: paddb %xmm4, %xmm3
16431 ; SSE41-NEXT: pxor %xmm0, %xmm0
16432 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
16433 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
16434 ; SSE41-NEXT: psadbw %xmm0, %xmm3
16435 ; SSE41-NEXT: psadbw %xmm0, %xmm1
16436 ; SSE41-NEXT: packuswb %xmm3, %xmm1
16437 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30]
16438 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
16441 ; AVX1-LABEL: ult_30_v4i32:
16443 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16444 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
16445 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16446 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16447 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
16448 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
16449 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16450 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16451 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
16452 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16453 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16454 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16455 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16456 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16457 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30,30,30]
16458 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16461 ; AVX2-LABEL: ult_30_v4i32:
16463 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16464 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
16465 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16466 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16467 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
16468 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
16469 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16470 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16471 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
16472 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16473 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16474 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16475 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16476 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16477 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16478 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16481 ; AVX512VPOPCNTDQ-LABEL: ult_30_v4i32:
16482 ; AVX512VPOPCNTDQ: # %bb.0:
16483 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16484 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
16485 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16486 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16487 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16488 ; AVX512VPOPCNTDQ-NEXT: retq
16490 ; AVX512VPOPCNTDQVL-LABEL: ult_30_v4i32:
16491 ; AVX512VPOPCNTDQVL: # %bb.0:
16492 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
16493 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
16494 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16495 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16496 ; AVX512VPOPCNTDQVL-NEXT: retq
16498 ; BITALG_NOVLX-LABEL: ult_30_v4i32:
16499 ; BITALG_NOVLX: # %bb.0:
16500 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16501 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
16502 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16503 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16504 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16505 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16506 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16507 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16508 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16509 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16510 ; BITALG_NOVLX-NEXT: vzeroupper
16511 ; BITALG_NOVLX-NEXT: retq
16513 ; BITALG-LABEL: ult_30_v4i32:
16515 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
16516 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16517 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16518 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16519 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16520 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16521 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16522 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
16523 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16524 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16525 ; BITALG-NEXT: retq
16526 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16527 %3 = icmp ult <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30>
16528 %4 = sext <4 x i1> %3 to <4 x i32>
16532 define <4 x i32> @ugt_30_v4i32(<4 x i32> %0) {
16533 ; SSE2-LABEL: ugt_30_v4i32:
16535 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16536 ; SSE2-NEXT: psrlw $1, %xmm1
16537 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16538 ; SSE2-NEXT: psubb %xmm1, %xmm0
16539 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16540 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16541 ; SSE2-NEXT: pand %xmm1, %xmm2
16542 ; SSE2-NEXT: psrlw $2, %xmm0
16543 ; SSE2-NEXT: pand %xmm1, %xmm0
16544 ; SSE2-NEXT: paddb %xmm2, %xmm0
16545 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16546 ; SSE2-NEXT: psrlw $4, %xmm1
16547 ; SSE2-NEXT: paddb %xmm1, %xmm0
16548 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16549 ; SSE2-NEXT: pxor %xmm1, %xmm1
16550 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16551 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
16552 ; SSE2-NEXT: psadbw %xmm1, %xmm2
16553 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16554 ; SSE2-NEXT: psadbw %xmm1, %xmm0
16555 ; SSE2-NEXT: packuswb %xmm2, %xmm0
16556 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16559 ; SSE3-LABEL: ugt_30_v4i32:
16561 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16562 ; SSE3-NEXT: psrlw $1, %xmm1
16563 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16564 ; SSE3-NEXT: psubb %xmm1, %xmm0
16565 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16566 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16567 ; SSE3-NEXT: pand %xmm1, %xmm2
16568 ; SSE3-NEXT: psrlw $2, %xmm0
16569 ; SSE3-NEXT: pand %xmm1, %xmm0
16570 ; SSE3-NEXT: paddb %xmm2, %xmm0
16571 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16572 ; SSE3-NEXT: psrlw $4, %xmm1
16573 ; SSE3-NEXT: paddb %xmm1, %xmm0
16574 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16575 ; SSE3-NEXT: pxor %xmm1, %xmm1
16576 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16577 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
16578 ; SSE3-NEXT: psadbw %xmm1, %xmm2
16579 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16580 ; SSE3-NEXT: psadbw %xmm1, %xmm0
16581 ; SSE3-NEXT: packuswb %xmm2, %xmm0
16582 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16585 ; SSSE3-LABEL: ugt_30_v4i32:
16587 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16588 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
16589 ; SSSE3-NEXT: pand %xmm2, %xmm3
16590 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16591 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
16592 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
16593 ; SSSE3-NEXT: psrlw $4, %xmm0
16594 ; SSSE3-NEXT: pand %xmm2, %xmm0
16595 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
16596 ; SSSE3-NEXT: paddb %xmm4, %xmm1
16597 ; SSSE3-NEXT: pxor %xmm0, %xmm0
16598 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
16599 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16600 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
16601 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16602 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
16603 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
16604 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16605 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
16608 ; SSE41-LABEL: ugt_30_v4i32:
16610 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16611 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16612 ; SSE41-NEXT: pand %xmm1, %xmm2
16613 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16614 ; SSE41-NEXT: movdqa %xmm3, %xmm4
16615 ; SSE41-NEXT: pshufb %xmm2, %xmm4
16616 ; SSE41-NEXT: psrlw $4, %xmm0
16617 ; SSE41-NEXT: pand %xmm1, %xmm0
16618 ; SSE41-NEXT: pshufb %xmm0, %xmm3
16619 ; SSE41-NEXT: paddb %xmm4, %xmm3
16620 ; SSE41-NEXT: pxor %xmm1, %xmm1
16621 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
16622 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
16623 ; SSE41-NEXT: psadbw %xmm1, %xmm3
16624 ; SSE41-NEXT: psadbw %xmm1, %xmm0
16625 ; SSE41-NEXT: packuswb %xmm3, %xmm0
16626 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16629 ; AVX1-LABEL: ugt_30_v4i32:
16631 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16632 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
16633 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16634 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16635 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
16636 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
16637 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16638 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16639 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
16640 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16641 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16642 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16643 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16644 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16645 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
16648 ; AVX2-LABEL: ugt_30_v4i32:
16650 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16651 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
16652 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16653 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16654 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
16655 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
16656 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16657 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16658 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
16659 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16660 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16661 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16662 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16663 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16664 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16665 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16668 ; AVX512VPOPCNTDQ-LABEL: ugt_30_v4i32:
16669 ; AVX512VPOPCNTDQ: # %bb.0:
16670 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16671 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
16672 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16673 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16674 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16675 ; AVX512VPOPCNTDQ-NEXT: retq
16677 ; AVX512VPOPCNTDQVL-LABEL: ugt_30_v4i32:
16678 ; AVX512VPOPCNTDQVL: # %bb.0:
16679 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
16680 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
16681 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16682 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16683 ; AVX512VPOPCNTDQVL-NEXT: retq
16685 ; BITALG_NOVLX-LABEL: ugt_30_v4i32:
16686 ; BITALG_NOVLX: # %bb.0:
16687 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16688 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
16689 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16690 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16691 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16692 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16693 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16694 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16695 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16696 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16697 ; BITALG_NOVLX-NEXT: vzeroupper
16698 ; BITALG_NOVLX-NEXT: retq
16700 ; BITALG-LABEL: ugt_30_v4i32:
16702 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
16703 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16704 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16705 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16706 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16707 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16708 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16709 ; BITALG-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
16710 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16711 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16712 ; BITALG-NEXT: retq
16713 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16714 %3 = icmp ugt <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30>
16715 %4 = sext <4 x i1> %3 to <4 x i32>
16719 define <4 x i32> @ult_31_v4i32(<4 x i32> %0) {
16720 ; SSE2-LABEL: ult_31_v4i32:
16722 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16723 ; SSE2-NEXT: psrlw $1, %xmm1
16724 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16725 ; SSE2-NEXT: psubb %xmm1, %xmm0
16726 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16727 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16728 ; SSE2-NEXT: pand %xmm1, %xmm2
16729 ; SSE2-NEXT: psrlw $2, %xmm0
16730 ; SSE2-NEXT: pand %xmm1, %xmm0
16731 ; SSE2-NEXT: paddb %xmm2, %xmm0
16732 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16733 ; SSE2-NEXT: psrlw $4, %xmm1
16734 ; SSE2-NEXT: paddb %xmm0, %xmm1
16735 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16736 ; SSE2-NEXT: pxor %xmm0, %xmm0
16737 ; SSE2-NEXT: movdqa %xmm1, %xmm2
16738 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16739 ; SSE2-NEXT: psadbw %xmm0, %xmm2
16740 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16741 ; SSE2-NEXT: psadbw %xmm0, %xmm1
16742 ; SSE2-NEXT: packuswb %xmm2, %xmm1
16743 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31]
16744 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
16747 ; SSE3-LABEL: ult_31_v4i32:
16749 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16750 ; SSE3-NEXT: psrlw $1, %xmm1
16751 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16752 ; SSE3-NEXT: psubb %xmm1, %xmm0
16753 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16754 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16755 ; SSE3-NEXT: pand %xmm1, %xmm2
16756 ; SSE3-NEXT: psrlw $2, %xmm0
16757 ; SSE3-NEXT: pand %xmm1, %xmm0
16758 ; SSE3-NEXT: paddb %xmm2, %xmm0
16759 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16760 ; SSE3-NEXT: psrlw $4, %xmm1
16761 ; SSE3-NEXT: paddb %xmm0, %xmm1
16762 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16763 ; SSE3-NEXT: pxor %xmm0, %xmm0
16764 ; SSE3-NEXT: movdqa %xmm1, %xmm2
16765 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16766 ; SSE3-NEXT: psadbw %xmm0, %xmm2
16767 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16768 ; SSE3-NEXT: psadbw %xmm0, %xmm1
16769 ; SSE3-NEXT: packuswb %xmm2, %xmm1
16770 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31]
16771 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
16774 ; SSSE3-LABEL: ult_31_v4i32:
16776 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16777 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
16778 ; SSSE3-NEXT: pand %xmm1, %xmm2
16779 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16780 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
16781 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
16782 ; SSSE3-NEXT: psrlw $4, %xmm0
16783 ; SSSE3-NEXT: pand %xmm1, %xmm0
16784 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
16785 ; SSSE3-NEXT: paddb %xmm4, %xmm3
16786 ; SSSE3-NEXT: pxor %xmm0, %xmm0
16787 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
16788 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
16789 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
16790 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
16791 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
16792 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
16793 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31]
16794 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
16797 ; SSE41-LABEL: ult_31_v4i32:
16799 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16800 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16801 ; SSE41-NEXT: pand %xmm1, %xmm2
16802 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16803 ; SSE41-NEXT: movdqa %xmm3, %xmm4
16804 ; SSE41-NEXT: pshufb %xmm2, %xmm4
16805 ; SSE41-NEXT: psrlw $4, %xmm0
16806 ; SSE41-NEXT: pand %xmm1, %xmm0
16807 ; SSE41-NEXT: pshufb %xmm0, %xmm3
16808 ; SSE41-NEXT: paddb %xmm4, %xmm3
16809 ; SSE41-NEXT: pxor %xmm0, %xmm0
16810 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
16811 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
16812 ; SSE41-NEXT: psadbw %xmm0, %xmm3
16813 ; SSE41-NEXT: psadbw %xmm0, %xmm1
16814 ; SSE41-NEXT: packuswb %xmm3, %xmm1
16815 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31]
16816 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
16819 ; AVX1-LABEL: ult_31_v4i32:
16821 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16822 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
16823 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16824 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16825 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
16826 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
16827 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16828 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16829 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
16830 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16831 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16832 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16833 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16834 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16835 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31,31,31]
16836 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16839 ; AVX2-LABEL: ult_31_v4i32:
16841 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16842 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
16843 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16844 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16845 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
16846 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
16847 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16848 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16849 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
16850 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16851 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16852 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16853 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16854 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16855 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16856 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16859 ; AVX512VPOPCNTDQ-LABEL: ult_31_v4i32:
16860 ; AVX512VPOPCNTDQ: # %bb.0:
16861 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16862 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
16863 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16864 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16865 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16866 ; AVX512VPOPCNTDQ-NEXT: retq
16868 ; AVX512VPOPCNTDQVL-LABEL: ult_31_v4i32:
16869 ; AVX512VPOPCNTDQVL: # %bb.0:
16870 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
16871 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
16872 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16873 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16874 ; AVX512VPOPCNTDQVL-NEXT: retq
16876 ; BITALG_NOVLX-LABEL: ult_31_v4i32:
16877 ; BITALG_NOVLX: # %bb.0:
16878 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16879 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
16880 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16881 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16882 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16883 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16884 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16885 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16886 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16887 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16888 ; BITALG_NOVLX-NEXT: vzeroupper
16889 ; BITALG_NOVLX-NEXT: retq
16891 ; BITALG-LABEL: ult_31_v4i32:
16893 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
16894 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16895 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16896 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16897 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16898 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16899 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16900 ; BITALG-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
16901 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16902 ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
16903 ; BITALG-NEXT: retq
16904 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16905 %3 = icmp ult <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
16906 %4 = sext <4 x i1> %3 to <4 x i32>
16910 define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
16911 ; SSE2-LABEL: ugt_1_v2i64:
16913 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
16914 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16915 ; SSE2-NEXT: paddq %xmm1, %xmm2
16916 ; SSE2-NEXT: pand %xmm2, %xmm0
16917 ; SSE2-NEXT: pxor %xmm2, %xmm2
16918 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
16919 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
16920 ; SSE2-NEXT: pand %xmm2, %xmm0
16921 ; SSE2-NEXT: pxor %xmm1, %xmm0
16924 ; SSE3-LABEL: ugt_1_v2i64:
16926 ; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
16927 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16928 ; SSE3-NEXT: paddq %xmm1, %xmm2
16929 ; SSE3-NEXT: pand %xmm2, %xmm0
16930 ; SSE3-NEXT: pxor %xmm2, %xmm2
16931 ; SSE3-NEXT: pcmpeqd %xmm2, %xmm0
16932 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
16933 ; SSE3-NEXT: pand %xmm2, %xmm0
16934 ; SSE3-NEXT: pxor %xmm1, %xmm0
16937 ; SSSE3-LABEL: ugt_1_v2i64:
16939 ; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
16940 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
16941 ; SSSE3-NEXT: paddq %xmm1, %xmm2
16942 ; SSSE3-NEXT: pand %xmm2, %xmm0
16943 ; SSSE3-NEXT: pxor %xmm2, %xmm2
16944 ; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
16945 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
16946 ; SSSE3-NEXT: pand %xmm2, %xmm0
16947 ; SSSE3-NEXT: pxor %xmm1, %xmm0
16950 ; SSE41-LABEL: ugt_1_v2i64:
16952 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
16953 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16954 ; SSE41-NEXT: paddq %xmm1, %xmm2
16955 ; SSE41-NEXT: pand %xmm2, %xmm0
16956 ; SSE41-NEXT: pxor %xmm2, %xmm2
16957 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm0
16958 ; SSE41-NEXT: pxor %xmm1, %xmm0
16961 ; AVX1-LABEL: ugt_1_v2i64:
16963 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
16964 ; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm2
16965 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
16966 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
16967 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
16968 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
16971 ; AVX2-LABEL: ugt_1_v2i64:
16973 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
16974 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2
16975 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
16976 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
16977 ; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
16978 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
16981 ; AVX512VPOPCNTDQ-LABEL: ugt_1_v2i64:
16982 ; AVX512VPOPCNTDQ: # %bb.0:
16983 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16984 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
16985 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
16986 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16987 ; AVX512VPOPCNTDQ-NEXT: retq
16989 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v2i64:
16990 ; AVX512VPOPCNTDQVL: # %bb.0:
16991 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
16992 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
16993 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
16994 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
16995 ; AVX512VPOPCNTDQVL-NEXT: retq
16997 ; BITALG_NOVLX-LABEL: ugt_1_v2i64:
16998 ; BITALG_NOVLX: # %bb.0:
16999 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
17000 ; BITALG_NOVLX-NEXT: vpaddq %xmm1, %xmm0, %xmm1
17001 ; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
17002 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17003 ; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
17004 ; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
17005 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
17006 ; BITALG_NOVLX-NEXT: vzeroupper
17007 ; BITALG_NOVLX-NEXT: retq
17009 ; BITALG-LABEL: ugt_1_v2i64:
17011 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
17012 ; BITALG-NEXT: vpaddq %xmm1, %xmm0, %xmm1
17013 ; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
17014 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17015 ; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
17016 ; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
17017 ; BITALG-NEXT: retq
17018 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17019 %3 = icmp ugt <2 x i64> %2, <i64 1, i64 1>
17020 %4 = sext <2 x i1> %3 to <2 x i64>
17024 define <2 x i64> @ult_2_v2i64(<2 x i64> %0) {
17025 ; SSE2-LABEL: ult_2_v2i64:
17027 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
17028 ; SSE2-NEXT: paddq %xmm0, %xmm1
17029 ; SSE2-NEXT: pand %xmm1, %xmm0
17030 ; SSE2-NEXT: pxor %xmm1, %xmm1
17031 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
17032 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
17033 ; SSE2-NEXT: pand %xmm1, %xmm0
17036 ; SSE3-LABEL: ult_2_v2i64:
17038 ; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
17039 ; SSE3-NEXT: paddq %xmm0, %xmm1
17040 ; SSE3-NEXT: pand %xmm1, %xmm0
17041 ; SSE3-NEXT: pxor %xmm1, %xmm1
17042 ; SSE3-NEXT: pcmpeqd %xmm1, %xmm0
17043 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
17044 ; SSE3-NEXT: pand %xmm1, %xmm0
17047 ; SSSE3-LABEL: ult_2_v2i64:
17049 ; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
17050 ; SSSE3-NEXT: paddq %xmm0, %xmm1
17051 ; SSSE3-NEXT: pand %xmm1, %xmm0
17052 ; SSSE3-NEXT: pxor %xmm1, %xmm1
17053 ; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
17054 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
17055 ; SSSE3-NEXT: pand %xmm1, %xmm0
17058 ; SSE41-LABEL: ult_2_v2i64:
17060 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
17061 ; SSE41-NEXT: paddq %xmm0, %xmm1
17062 ; SSE41-NEXT: pand %xmm1, %xmm0
17063 ; SSE41-NEXT: pxor %xmm1, %xmm1
17064 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
17067 ; AVX1-LABEL: ult_2_v2i64:
17069 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
17070 ; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm1
17071 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17072 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17073 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
17076 ; AVX2-LABEL: ult_2_v2i64:
17078 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
17079 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1
17080 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17081 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17082 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
17085 ; AVX512VPOPCNTDQ-LABEL: ult_2_v2i64:
17086 ; AVX512VPOPCNTDQ: # %bb.0:
17087 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17088 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17089 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2]
17090 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17091 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17092 ; AVX512VPOPCNTDQ-NEXT: retq
17094 ; AVX512VPOPCNTDQVL-LABEL: ult_2_v2i64:
17095 ; AVX512VPOPCNTDQVL: # %bb.0:
17096 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17097 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
17098 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17099 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17100 ; AVX512VPOPCNTDQVL-NEXT: retq
17102 ; BITALG_NOVLX-LABEL: ult_2_v2i64:
17103 ; BITALG_NOVLX: # %bb.0:
17104 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
17105 ; BITALG_NOVLX-NEXT: vpaddq %xmm1, %xmm0, %xmm1
17106 ; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
17107 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17108 ; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
17109 ; BITALG_NOVLX-NEXT: retq
17111 ; BITALG-LABEL: ult_2_v2i64:
17113 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
17114 ; BITALG-NEXT: vpaddq %xmm1, %xmm0, %xmm1
17115 ; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
17116 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17117 ; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
17118 ; BITALG-NEXT: retq
17119 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17120 %3 = icmp ult <2 x i64> %2, <i64 2, i64 2>
17121 %4 = sext <2 x i1> %3 to <2 x i64>
17125 define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
17126 ; SSE2-LABEL: ugt_2_v2i64:
17128 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17129 ; SSE2-NEXT: psrlw $1, %xmm1
17130 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17131 ; SSE2-NEXT: psubb %xmm1, %xmm0
17132 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17133 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17134 ; SSE2-NEXT: pand %xmm1, %xmm2
17135 ; SSE2-NEXT: psrlw $2, %xmm0
17136 ; SSE2-NEXT: pand %xmm1, %xmm0
17137 ; SSE2-NEXT: paddb %xmm2, %xmm0
17138 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17139 ; SSE2-NEXT: psrlw $4, %xmm1
17140 ; SSE2-NEXT: paddb %xmm0, %xmm1
17141 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17142 ; SSE2-NEXT: pxor %xmm0, %xmm0
17143 ; SSE2-NEXT: psadbw %xmm0, %xmm1
17144 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17145 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
17146 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
17147 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17148 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
17149 ; SSE2-NEXT: pand %xmm2, %xmm3
17150 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
17151 ; SSE2-NEXT: por %xmm3, %xmm0
17154 ; SSE3-LABEL: ugt_2_v2i64:
17156 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17157 ; SSE3-NEXT: psrlw $1, %xmm1
17158 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17159 ; SSE3-NEXT: psubb %xmm1, %xmm0
17160 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17161 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17162 ; SSE3-NEXT: pand %xmm1, %xmm2
17163 ; SSE3-NEXT: psrlw $2, %xmm0
17164 ; SSE3-NEXT: pand %xmm1, %xmm0
17165 ; SSE3-NEXT: paddb %xmm2, %xmm0
17166 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17167 ; SSE3-NEXT: psrlw $4, %xmm1
17168 ; SSE3-NEXT: paddb %xmm0, %xmm1
17169 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17170 ; SSE3-NEXT: pxor %xmm0, %xmm0
17171 ; SSE3-NEXT: psadbw %xmm0, %xmm1
17172 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17173 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
17174 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
17175 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17176 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
17177 ; SSE3-NEXT: pand %xmm2, %xmm3
17178 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
17179 ; SSE3-NEXT: por %xmm3, %xmm0
17182 ; SSSE3-LABEL: ugt_2_v2i64:
17184 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17185 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17186 ; SSSE3-NEXT: pand %xmm1, %xmm2
17187 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17188 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17189 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17190 ; SSSE3-NEXT: psrlw $4, %xmm0
17191 ; SSSE3-NEXT: pand %xmm1, %xmm0
17192 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17193 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17194 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17195 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
17196 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17197 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17198 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
17199 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17200 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
17201 ; SSSE3-NEXT: pand %xmm1, %xmm2
17202 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
17203 ; SSSE3-NEXT: por %xmm2, %xmm0
17206 ; SSE41-LABEL: ugt_2_v2i64:
17208 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17209 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17210 ; SSE41-NEXT: pand %xmm1, %xmm2
17211 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17212 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17213 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17214 ; SSE41-NEXT: psrlw $4, %xmm0
17215 ; SSE41-NEXT: pand %xmm1, %xmm0
17216 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17217 ; SSE41-NEXT: paddb %xmm4, %xmm3
17218 ; SSE41-NEXT: pxor %xmm0, %xmm0
17219 ; SSE41-NEXT: psadbw %xmm0, %xmm3
17220 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17221 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17222 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
17223 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17224 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
17225 ; SSE41-NEXT: pand %xmm1, %xmm2
17226 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
17227 ; SSE41-NEXT: por %xmm2, %xmm0
17230 ; AVX1-LABEL: ugt_2_v2i64:
17232 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17233 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17234 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17235 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17236 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17237 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17238 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17239 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17240 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17241 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17242 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17245 ; AVX2-LABEL: ugt_2_v2i64:
17247 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17248 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17249 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17250 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17251 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17252 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17253 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17254 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17255 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17256 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17257 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17260 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v2i64:
17261 ; AVX512VPOPCNTDQ: # %bb.0:
17262 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17263 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17264 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17265 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17266 ; AVX512VPOPCNTDQ-NEXT: retq
17268 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v2i64:
17269 ; AVX512VPOPCNTDQVL: # %bb.0:
17270 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17271 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
17272 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17273 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17274 ; AVX512VPOPCNTDQVL-NEXT: retq
17276 ; BITALG_NOVLX-LABEL: ugt_2_v2i64:
17277 ; BITALG_NOVLX: # %bb.0:
17278 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17279 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
17280 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17281 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17282 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17283 ; BITALG_NOVLX-NEXT: vzeroupper
17284 ; BITALG_NOVLX-NEXT: retq
17286 ; BITALG-LABEL: ugt_2_v2i64:
17288 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
17289 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17290 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17291 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
17292 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17293 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17294 ; BITALG-NEXT: retq
17295 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17296 %3 = icmp ugt <2 x i64> %2, <i64 2, i64 2>
17297 %4 = sext <2 x i1> %3 to <2 x i64>
17301 define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
17302 ; SSE2-LABEL: ult_3_v2i64:
17304 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17305 ; SSE2-NEXT: psrlw $1, %xmm1
17306 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17307 ; SSE2-NEXT: psubb %xmm1, %xmm0
17308 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17309 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17310 ; SSE2-NEXT: pand %xmm1, %xmm2
17311 ; SSE2-NEXT: psrlw $2, %xmm0
17312 ; SSE2-NEXT: pand %xmm1, %xmm0
17313 ; SSE2-NEXT: paddb %xmm2, %xmm0
17314 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17315 ; SSE2-NEXT: psrlw $4, %xmm1
17316 ; SSE2-NEXT: paddb %xmm0, %xmm1
17317 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17318 ; SSE2-NEXT: pxor %xmm0, %xmm0
17319 ; SSE2-NEXT: psadbw %xmm0, %xmm1
17320 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17321 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
17322 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
17323 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651]
17324 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
17325 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17326 ; SSE2-NEXT: pand %xmm2, %xmm1
17327 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
17328 ; SSE2-NEXT: por %xmm1, %xmm0
17331 ; SSE3-LABEL: ult_3_v2i64:
17333 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17334 ; SSE3-NEXT: psrlw $1, %xmm1
17335 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17336 ; SSE3-NEXT: psubb %xmm1, %xmm0
17337 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17338 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17339 ; SSE3-NEXT: pand %xmm1, %xmm2
17340 ; SSE3-NEXT: psrlw $2, %xmm0
17341 ; SSE3-NEXT: pand %xmm1, %xmm0
17342 ; SSE3-NEXT: paddb %xmm2, %xmm0
17343 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17344 ; SSE3-NEXT: psrlw $4, %xmm1
17345 ; SSE3-NEXT: paddb %xmm0, %xmm1
17346 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17347 ; SSE3-NEXT: pxor %xmm0, %xmm0
17348 ; SSE3-NEXT: psadbw %xmm0, %xmm1
17349 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17350 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
17351 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
17352 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651]
17353 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
17354 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17355 ; SSE3-NEXT: pand %xmm2, %xmm1
17356 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
17357 ; SSE3-NEXT: por %xmm1, %xmm0
17360 ; SSSE3-LABEL: ult_3_v2i64:
17362 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17363 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17364 ; SSSE3-NEXT: pand %xmm1, %xmm2
17365 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17366 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17367 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17368 ; SSSE3-NEXT: psrlw $4, %xmm0
17369 ; SSSE3-NEXT: pand %xmm1, %xmm0
17370 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17371 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17372 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17373 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
17374 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17375 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17376 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
17377 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651]
17378 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
17379 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
17380 ; SSSE3-NEXT: pand %xmm1, %xmm2
17381 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
17382 ; SSSE3-NEXT: por %xmm2, %xmm0
17385 ; SSE41-LABEL: ult_3_v2i64:
17387 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17388 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17389 ; SSE41-NEXT: pand %xmm1, %xmm2
17390 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17391 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17392 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17393 ; SSE41-NEXT: psrlw $4, %xmm0
17394 ; SSE41-NEXT: pand %xmm1, %xmm0
17395 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17396 ; SSE41-NEXT: paddb %xmm4, %xmm3
17397 ; SSE41-NEXT: pxor %xmm0, %xmm0
17398 ; SSE41-NEXT: psadbw %xmm0, %xmm3
17399 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17400 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17401 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
17402 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651]
17403 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
17404 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
17405 ; SSE41-NEXT: pand %xmm1, %xmm2
17406 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
17407 ; SSE41-NEXT: por %xmm2, %xmm0
17410 ; AVX1-LABEL: ult_3_v2i64:
17412 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17413 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17414 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17415 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17416 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17417 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17418 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17419 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17420 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17421 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17422 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3]
17423 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17426 ; AVX2-LABEL: ult_3_v2i64:
17428 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17429 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17430 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17431 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17432 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17433 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17434 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17435 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17436 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17437 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17438 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3]
17439 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17442 ; AVX512VPOPCNTDQ-LABEL: ult_3_v2i64:
17443 ; AVX512VPOPCNTDQ: # %bb.0:
17444 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17445 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17446 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3]
17447 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17448 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17449 ; AVX512VPOPCNTDQ-NEXT: retq
17451 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v2i64:
17452 ; AVX512VPOPCNTDQVL: # %bb.0:
17453 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17454 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
17455 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17456 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17457 ; AVX512VPOPCNTDQVL-NEXT: retq
17459 ; BITALG_NOVLX-LABEL: ult_3_v2i64:
17460 ; BITALG_NOVLX: # %bb.0:
17461 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17462 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
17463 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17464 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17465 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3]
17466 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17467 ; BITALG_NOVLX-NEXT: vzeroupper
17468 ; BITALG_NOVLX-NEXT: retq
17470 ; BITALG-LABEL: ult_3_v2i64:
17472 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
17473 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17474 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17475 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
17476 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17477 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17478 ; BITALG-NEXT: retq
17479 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17480 %3 = icmp ult <2 x i64> %2, <i64 3, i64 3>
17481 %4 = sext <2 x i1> %3 to <2 x i64>
17485 define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
17486 ; SSE2-LABEL: ugt_3_v2i64:
17488 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17489 ; SSE2-NEXT: psrlw $1, %xmm1
17490 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17491 ; SSE2-NEXT: psubb %xmm1, %xmm0
17492 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17493 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17494 ; SSE2-NEXT: pand %xmm1, %xmm2
17495 ; SSE2-NEXT: psrlw $2, %xmm0
17496 ; SSE2-NEXT: pand %xmm1, %xmm0
17497 ; SSE2-NEXT: paddb %xmm2, %xmm0
17498 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17499 ; SSE2-NEXT: psrlw $4, %xmm1
17500 ; SSE2-NEXT: paddb %xmm0, %xmm1
17501 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17502 ; SSE2-NEXT: pxor %xmm0, %xmm0
17503 ; SSE2-NEXT: psadbw %xmm0, %xmm1
17504 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17505 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
17506 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
17507 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17508 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
17509 ; SSE2-NEXT: pand %xmm2, %xmm3
17510 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
17511 ; SSE2-NEXT: por %xmm3, %xmm0
17514 ; SSE3-LABEL: ugt_3_v2i64:
17516 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17517 ; SSE3-NEXT: psrlw $1, %xmm1
17518 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17519 ; SSE3-NEXT: psubb %xmm1, %xmm0
17520 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17521 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17522 ; SSE3-NEXT: pand %xmm1, %xmm2
17523 ; SSE3-NEXT: psrlw $2, %xmm0
17524 ; SSE3-NEXT: pand %xmm1, %xmm0
17525 ; SSE3-NEXT: paddb %xmm2, %xmm0
17526 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17527 ; SSE3-NEXT: psrlw $4, %xmm1
17528 ; SSE3-NEXT: paddb %xmm0, %xmm1
17529 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17530 ; SSE3-NEXT: pxor %xmm0, %xmm0
17531 ; SSE3-NEXT: psadbw %xmm0, %xmm1
17532 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17533 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
17534 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
17535 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17536 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
17537 ; SSE3-NEXT: pand %xmm2, %xmm3
17538 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
17539 ; SSE3-NEXT: por %xmm3, %xmm0
17542 ; SSSE3-LABEL: ugt_3_v2i64:
17544 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17545 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17546 ; SSSE3-NEXT: pand %xmm1, %xmm2
17547 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17548 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17549 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17550 ; SSSE3-NEXT: psrlw $4, %xmm0
17551 ; SSSE3-NEXT: pand %xmm1, %xmm0
17552 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17553 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17554 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17555 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
17556 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17557 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17558 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
17559 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17560 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
17561 ; SSSE3-NEXT: pand %xmm1, %xmm2
17562 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
17563 ; SSSE3-NEXT: por %xmm2, %xmm0
17566 ; SSE41-LABEL: ugt_3_v2i64:
17568 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17569 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17570 ; SSE41-NEXT: pand %xmm1, %xmm2
17571 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17572 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17573 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17574 ; SSE41-NEXT: psrlw $4, %xmm0
17575 ; SSE41-NEXT: pand %xmm1, %xmm0
17576 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17577 ; SSE41-NEXT: paddb %xmm4, %xmm3
17578 ; SSE41-NEXT: pxor %xmm0, %xmm0
17579 ; SSE41-NEXT: psadbw %xmm0, %xmm3
17580 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17581 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17582 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
17583 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17584 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
17585 ; SSE41-NEXT: pand %xmm1, %xmm2
17586 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
17587 ; SSE41-NEXT: por %xmm2, %xmm0
17590 ; AVX1-LABEL: ugt_3_v2i64:
17592 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17593 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17594 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17595 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17596 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17597 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17598 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17599 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17600 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17601 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17602 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17605 ; AVX2-LABEL: ugt_3_v2i64:
17607 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17608 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17609 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17610 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17611 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17612 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17613 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17614 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17615 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17616 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17617 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17620 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v2i64:
17621 ; AVX512VPOPCNTDQ: # %bb.0:
17622 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17623 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17624 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17625 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17626 ; AVX512VPOPCNTDQ-NEXT: retq
17628 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v2i64:
17629 ; AVX512VPOPCNTDQVL: # %bb.0:
17630 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17631 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
17632 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17633 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17634 ; AVX512VPOPCNTDQVL-NEXT: retq
17636 ; BITALG_NOVLX-LABEL: ugt_3_v2i64:
17637 ; BITALG_NOVLX: # %bb.0:
17638 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17639 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
17640 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17641 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17642 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17643 ; BITALG_NOVLX-NEXT: vzeroupper
17644 ; BITALG_NOVLX-NEXT: retq
17646 ; BITALG-LABEL: ugt_3_v2i64:
17648 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
17649 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17650 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17651 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
17652 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17653 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17654 ; BITALG-NEXT: retq
17655 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17656 %3 = icmp ugt <2 x i64> %2, <i64 3, i64 3>
17657 %4 = sext <2 x i1> %3 to <2 x i64>
17661 define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
17662 ; SSE2-LABEL: ult_4_v2i64:
17664 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17665 ; SSE2-NEXT: psrlw $1, %xmm1
17666 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17667 ; SSE2-NEXT: psubb %xmm1, %xmm0
17668 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17669 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17670 ; SSE2-NEXT: pand %xmm1, %xmm2
17671 ; SSE2-NEXT: psrlw $2, %xmm0
17672 ; SSE2-NEXT: pand %xmm1, %xmm0
17673 ; SSE2-NEXT: paddb %xmm2, %xmm0
17674 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17675 ; SSE2-NEXT: psrlw $4, %xmm1
17676 ; SSE2-NEXT: paddb %xmm0, %xmm1
17677 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17678 ; SSE2-NEXT: pxor %xmm0, %xmm0
17679 ; SSE2-NEXT: psadbw %xmm0, %xmm1
17680 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17681 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
17682 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
17683 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483652,2147483652]
17684 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
17685 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17686 ; SSE2-NEXT: pand %xmm2, %xmm1
17687 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
17688 ; SSE2-NEXT: por %xmm1, %xmm0
17691 ; SSE3-LABEL: ult_4_v2i64:
17693 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17694 ; SSE3-NEXT: psrlw $1, %xmm1
17695 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17696 ; SSE3-NEXT: psubb %xmm1, %xmm0
17697 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17698 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17699 ; SSE3-NEXT: pand %xmm1, %xmm2
17700 ; SSE3-NEXT: psrlw $2, %xmm0
17701 ; SSE3-NEXT: pand %xmm1, %xmm0
17702 ; SSE3-NEXT: paddb %xmm2, %xmm0
17703 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17704 ; SSE3-NEXT: psrlw $4, %xmm1
17705 ; SSE3-NEXT: paddb %xmm0, %xmm1
17706 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17707 ; SSE3-NEXT: pxor %xmm0, %xmm0
17708 ; SSE3-NEXT: psadbw %xmm0, %xmm1
17709 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17710 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
17711 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
17712 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483652,2147483652]
17713 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
17714 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17715 ; SSE3-NEXT: pand %xmm2, %xmm1
17716 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
17717 ; SSE3-NEXT: por %xmm1, %xmm0
17720 ; SSSE3-LABEL: ult_4_v2i64:
17722 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17723 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17724 ; SSSE3-NEXT: pand %xmm1, %xmm2
17725 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17726 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17727 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17728 ; SSSE3-NEXT: psrlw $4, %xmm0
17729 ; SSSE3-NEXT: pand %xmm1, %xmm0
17730 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17731 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17732 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17733 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
17734 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17735 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17736 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
17737 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483652,2147483652]
17738 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
17739 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
17740 ; SSSE3-NEXT: pand %xmm1, %xmm2
17741 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
17742 ; SSSE3-NEXT: por %xmm2, %xmm0
17745 ; SSE41-LABEL: ult_4_v2i64:
17747 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17748 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17749 ; SSE41-NEXT: pand %xmm1, %xmm2
17750 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17751 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17752 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17753 ; SSE41-NEXT: psrlw $4, %xmm0
17754 ; SSE41-NEXT: pand %xmm1, %xmm0
17755 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17756 ; SSE41-NEXT: paddb %xmm4, %xmm3
17757 ; SSE41-NEXT: pxor %xmm0, %xmm0
17758 ; SSE41-NEXT: psadbw %xmm0, %xmm3
17759 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17760 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17761 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
17762 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483652,2147483652]
17763 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
17764 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
17765 ; SSE41-NEXT: pand %xmm1, %xmm2
17766 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
17767 ; SSE41-NEXT: por %xmm2, %xmm0
17770 ; AVX1-LABEL: ult_4_v2i64:
17772 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17773 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17774 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17775 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17776 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17777 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17778 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17779 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17780 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17781 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17782 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
17783 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17786 ; AVX2-LABEL: ult_4_v2i64:
17788 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17789 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17790 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17791 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17792 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17793 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17794 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17795 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17796 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17797 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17798 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
17799 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17802 ; AVX512VPOPCNTDQ-LABEL: ult_4_v2i64:
17803 ; AVX512VPOPCNTDQ: # %bb.0:
17804 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17805 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17806 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
17807 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17808 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17809 ; AVX512VPOPCNTDQ-NEXT: retq
17811 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v2i64:
17812 ; AVX512VPOPCNTDQVL: # %bb.0:
17813 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17814 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
17815 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17816 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17817 ; AVX512VPOPCNTDQVL-NEXT: retq
17819 ; BITALG_NOVLX-LABEL: ult_4_v2i64:
17820 ; BITALG_NOVLX: # %bb.0:
17821 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17822 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
17823 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17824 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17825 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
17826 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17827 ; BITALG_NOVLX-NEXT: vzeroupper
17828 ; BITALG_NOVLX-NEXT: retq
17830 ; BITALG-LABEL: ult_4_v2i64:
17832 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
17833 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17834 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17835 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
17836 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17837 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17838 ; BITALG-NEXT: retq
17839 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17840 %3 = icmp ult <2 x i64> %2, <i64 4, i64 4>
17841 %4 = sext <2 x i1> %3 to <2 x i64>
17845 define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
17846 ; SSE2-LABEL: ugt_4_v2i64:
17848 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17849 ; SSE2-NEXT: psrlw $1, %xmm1
17850 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17851 ; SSE2-NEXT: psubb %xmm1, %xmm0
17852 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17853 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17854 ; SSE2-NEXT: pand %xmm1, %xmm2
17855 ; SSE2-NEXT: psrlw $2, %xmm0
17856 ; SSE2-NEXT: pand %xmm1, %xmm0
17857 ; SSE2-NEXT: paddb %xmm2, %xmm0
17858 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17859 ; SSE2-NEXT: psrlw $4, %xmm1
17860 ; SSE2-NEXT: paddb %xmm0, %xmm1
17861 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17862 ; SSE2-NEXT: pxor %xmm0, %xmm0
17863 ; SSE2-NEXT: psadbw %xmm0, %xmm1
17864 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17865 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
17866 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
17867 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17868 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
17869 ; SSE2-NEXT: pand %xmm2, %xmm3
17870 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
17871 ; SSE2-NEXT: por %xmm3, %xmm0
17874 ; SSE3-LABEL: ugt_4_v2i64:
17876 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17877 ; SSE3-NEXT: psrlw $1, %xmm1
17878 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17879 ; SSE3-NEXT: psubb %xmm1, %xmm0
17880 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17881 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17882 ; SSE3-NEXT: pand %xmm1, %xmm2
17883 ; SSE3-NEXT: psrlw $2, %xmm0
17884 ; SSE3-NEXT: pand %xmm1, %xmm0
17885 ; SSE3-NEXT: paddb %xmm2, %xmm0
17886 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17887 ; SSE3-NEXT: psrlw $4, %xmm1
17888 ; SSE3-NEXT: paddb %xmm0, %xmm1
17889 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17890 ; SSE3-NEXT: pxor %xmm0, %xmm0
17891 ; SSE3-NEXT: psadbw %xmm0, %xmm1
17892 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17893 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
17894 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
17895 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17896 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
17897 ; SSE3-NEXT: pand %xmm2, %xmm3
17898 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
17899 ; SSE3-NEXT: por %xmm3, %xmm0
17902 ; SSSE3-LABEL: ugt_4_v2i64:
17904 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17905 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17906 ; SSSE3-NEXT: pand %xmm1, %xmm2
17907 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17908 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17909 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17910 ; SSSE3-NEXT: psrlw $4, %xmm0
17911 ; SSSE3-NEXT: pand %xmm1, %xmm0
17912 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17913 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17914 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17915 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
17916 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17917 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17918 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
17919 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17920 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
17921 ; SSSE3-NEXT: pand %xmm1, %xmm2
17922 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
17923 ; SSSE3-NEXT: por %xmm2, %xmm0
17926 ; SSE41-LABEL: ugt_4_v2i64:
17928 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17929 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17930 ; SSE41-NEXT: pand %xmm1, %xmm2
17931 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17932 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17933 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17934 ; SSE41-NEXT: psrlw $4, %xmm0
17935 ; SSE41-NEXT: pand %xmm1, %xmm0
17936 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17937 ; SSE41-NEXT: paddb %xmm4, %xmm3
17938 ; SSE41-NEXT: pxor %xmm0, %xmm0
17939 ; SSE41-NEXT: psadbw %xmm0, %xmm3
17940 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17941 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
17942 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
17943 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
17944 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
17945 ; SSE41-NEXT: pand %xmm1, %xmm2
17946 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
17947 ; SSE41-NEXT: por %xmm2, %xmm0
17950 ; AVX1-LABEL: ugt_4_v2i64:
17952 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17953 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17954 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17955 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17956 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17957 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17958 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17959 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17960 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17961 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17962 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17965 ; AVX2-LABEL: ugt_4_v2i64:
17967 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17968 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17969 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17970 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17971 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17972 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17973 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17974 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17975 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17976 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17977 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17980 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v2i64:
17981 ; AVX512VPOPCNTDQ: # %bb.0:
17982 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17983 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17984 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17985 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17986 ; AVX512VPOPCNTDQ-NEXT: retq
17988 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v2i64:
17989 ; AVX512VPOPCNTDQVL: # %bb.0:
17990 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17991 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
17992 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
17993 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
17994 ; AVX512VPOPCNTDQVL-NEXT: retq
17996 ; BITALG_NOVLX-LABEL: ugt_4_v2i64:
17997 ; BITALG_NOVLX: # %bb.0:
17998 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17999 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18000 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18001 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18002 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18003 ; BITALG_NOVLX-NEXT: vzeroupper
18004 ; BITALG_NOVLX-NEXT: retq
18006 ; BITALG-LABEL: ugt_4_v2i64:
18008 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18009 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18010 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18011 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18012 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18013 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18014 ; BITALG-NEXT: retq
18015 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18016 %3 = icmp ugt <2 x i64> %2, <i64 4, i64 4>
18017 %4 = sext <2 x i1> %3 to <2 x i64>
18021 define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
18022 ; SSE2-LABEL: ult_5_v2i64:
18024 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18025 ; SSE2-NEXT: psrlw $1, %xmm1
18026 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18027 ; SSE2-NEXT: psubb %xmm1, %xmm0
18028 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18029 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18030 ; SSE2-NEXT: pand %xmm1, %xmm2
18031 ; SSE2-NEXT: psrlw $2, %xmm0
18032 ; SSE2-NEXT: pand %xmm1, %xmm0
18033 ; SSE2-NEXT: paddb %xmm2, %xmm0
18034 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18035 ; SSE2-NEXT: psrlw $4, %xmm1
18036 ; SSE2-NEXT: paddb %xmm0, %xmm1
18037 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18038 ; SSE2-NEXT: pxor %xmm0, %xmm0
18039 ; SSE2-NEXT: psadbw %xmm0, %xmm1
18040 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18041 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18042 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
18043 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483653,2147483653]
18044 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
18045 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18046 ; SSE2-NEXT: pand %xmm2, %xmm1
18047 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18048 ; SSE2-NEXT: por %xmm1, %xmm0
18051 ; SSE3-LABEL: ult_5_v2i64:
18053 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18054 ; SSE3-NEXT: psrlw $1, %xmm1
18055 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18056 ; SSE3-NEXT: psubb %xmm1, %xmm0
18057 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18058 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18059 ; SSE3-NEXT: pand %xmm1, %xmm2
18060 ; SSE3-NEXT: psrlw $2, %xmm0
18061 ; SSE3-NEXT: pand %xmm1, %xmm0
18062 ; SSE3-NEXT: paddb %xmm2, %xmm0
18063 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18064 ; SSE3-NEXT: psrlw $4, %xmm1
18065 ; SSE3-NEXT: paddb %xmm0, %xmm1
18066 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18067 ; SSE3-NEXT: pxor %xmm0, %xmm0
18068 ; SSE3-NEXT: psadbw %xmm0, %xmm1
18069 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18070 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18071 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
18072 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483653,2147483653]
18073 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
18074 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18075 ; SSE3-NEXT: pand %xmm2, %xmm1
18076 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18077 ; SSE3-NEXT: por %xmm1, %xmm0
18080 ; SSSE3-LABEL: ult_5_v2i64:
18082 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18083 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18084 ; SSSE3-NEXT: pand %xmm1, %xmm2
18085 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18086 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18087 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18088 ; SSSE3-NEXT: psrlw $4, %xmm0
18089 ; SSSE3-NEXT: pand %xmm1, %xmm0
18090 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18091 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18092 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18093 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
18094 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18095 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18096 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
18097 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483653,2147483653]
18098 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
18099 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
18100 ; SSSE3-NEXT: pand %xmm1, %xmm2
18101 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18102 ; SSSE3-NEXT: por %xmm2, %xmm0
18105 ; SSE41-LABEL: ult_5_v2i64:
18107 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18108 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18109 ; SSE41-NEXT: pand %xmm1, %xmm2
18110 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18111 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18112 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18113 ; SSE41-NEXT: psrlw $4, %xmm0
18114 ; SSE41-NEXT: pand %xmm1, %xmm0
18115 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18116 ; SSE41-NEXT: paddb %xmm4, %xmm3
18117 ; SSE41-NEXT: pxor %xmm0, %xmm0
18118 ; SSE41-NEXT: psadbw %xmm0, %xmm3
18119 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18120 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18121 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
18122 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483653,2147483653]
18123 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
18124 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
18125 ; SSE41-NEXT: pand %xmm1, %xmm2
18126 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18127 ; SSE41-NEXT: por %xmm2, %xmm0
18130 ; AVX1-LABEL: ult_5_v2i64:
18132 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18133 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18134 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18135 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18136 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18137 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18138 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18139 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18140 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18141 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18142 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5]
18143 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18146 ; AVX2-LABEL: ult_5_v2i64:
18148 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18149 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18150 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18151 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18152 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18153 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18154 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18155 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18156 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18157 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18158 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5]
18159 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18162 ; AVX512VPOPCNTDQ-LABEL: ult_5_v2i64:
18163 ; AVX512VPOPCNTDQ: # %bb.0:
18164 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18165 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18166 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5]
18167 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18168 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18169 ; AVX512VPOPCNTDQ-NEXT: retq
18171 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v2i64:
18172 ; AVX512VPOPCNTDQVL: # %bb.0:
18173 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18174 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18175 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18176 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18177 ; AVX512VPOPCNTDQVL-NEXT: retq
18179 ; BITALG_NOVLX-LABEL: ult_5_v2i64:
18180 ; BITALG_NOVLX: # %bb.0:
18181 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18182 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18183 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18184 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18185 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5]
18186 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18187 ; BITALG_NOVLX-NEXT: vzeroupper
18188 ; BITALG_NOVLX-NEXT: retq
18190 ; BITALG-LABEL: ult_5_v2i64:
18192 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18193 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18194 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18195 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18196 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18197 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18198 ; BITALG-NEXT: retq
18199 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18200 %3 = icmp ult <2 x i64> %2, <i64 5, i64 5>
18201 %4 = sext <2 x i1> %3 to <2 x i64>
18205 define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
18206 ; SSE2-LABEL: ugt_5_v2i64:
18208 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18209 ; SSE2-NEXT: psrlw $1, %xmm1
18210 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18211 ; SSE2-NEXT: psubb %xmm1, %xmm0
18212 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18213 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18214 ; SSE2-NEXT: pand %xmm1, %xmm2
18215 ; SSE2-NEXT: psrlw $2, %xmm0
18216 ; SSE2-NEXT: pand %xmm1, %xmm0
18217 ; SSE2-NEXT: paddb %xmm2, %xmm0
18218 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18219 ; SSE2-NEXT: psrlw $4, %xmm1
18220 ; SSE2-NEXT: paddb %xmm0, %xmm1
18221 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18222 ; SSE2-NEXT: pxor %xmm0, %xmm0
18223 ; SSE2-NEXT: psadbw %xmm0, %xmm1
18224 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18225 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18226 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
18227 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18228 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
18229 ; SSE2-NEXT: pand %xmm2, %xmm3
18230 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
18231 ; SSE2-NEXT: por %xmm3, %xmm0
18234 ; SSE3-LABEL: ugt_5_v2i64:
18236 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18237 ; SSE3-NEXT: psrlw $1, %xmm1
18238 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18239 ; SSE3-NEXT: psubb %xmm1, %xmm0
18240 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18241 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18242 ; SSE3-NEXT: pand %xmm1, %xmm2
18243 ; SSE3-NEXT: psrlw $2, %xmm0
18244 ; SSE3-NEXT: pand %xmm1, %xmm0
18245 ; SSE3-NEXT: paddb %xmm2, %xmm0
18246 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18247 ; SSE3-NEXT: psrlw $4, %xmm1
18248 ; SSE3-NEXT: paddb %xmm0, %xmm1
18249 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18250 ; SSE3-NEXT: pxor %xmm0, %xmm0
18251 ; SSE3-NEXT: psadbw %xmm0, %xmm1
18252 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18253 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18254 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
18255 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18256 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
18257 ; SSE3-NEXT: pand %xmm2, %xmm3
18258 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
18259 ; SSE3-NEXT: por %xmm3, %xmm0
18262 ; SSSE3-LABEL: ugt_5_v2i64:
18264 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18265 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18266 ; SSSE3-NEXT: pand %xmm1, %xmm2
18267 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18268 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18269 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18270 ; SSSE3-NEXT: psrlw $4, %xmm0
18271 ; SSSE3-NEXT: pand %xmm1, %xmm0
18272 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18273 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18274 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18275 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
18276 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18277 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18278 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
18279 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18280 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
18281 ; SSSE3-NEXT: pand %xmm1, %xmm2
18282 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
18283 ; SSSE3-NEXT: por %xmm2, %xmm0
18286 ; SSE41-LABEL: ugt_5_v2i64:
18288 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18289 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18290 ; SSE41-NEXT: pand %xmm1, %xmm2
18291 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18292 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18293 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18294 ; SSE41-NEXT: psrlw $4, %xmm0
18295 ; SSE41-NEXT: pand %xmm1, %xmm0
18296 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18297 ; SSE41-NEXT: paddb %xmm4, %xmm3
18298 ; SSE41-NEXT: pxor %xmm0, %xmm0
18299 ; SSE41-NEXT: psadbw %xmm0, %xmm3
18300 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18301 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18302 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
18303 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18304 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
18305 ; SSE41-NEXT: pand %xmm1, %xmm2
18306 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
18307 ; SSE41-NEXT: por %xmm2, %xmm0
18310 ; AVX1-LABEL: ugt_5_v2i64:
18312 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18313 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18314 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18315 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18316 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18317 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18318 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18319 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18320 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18321 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18322 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18325 ; AVX2-LABEL: ugt_5_v2i64:
18327 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18328 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18329 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18330 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18331 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18332 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18333 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18334 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18335 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18336 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18337 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18340 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v2i64:
18341 ; AVX512VPOPCNTDQ: # %bb.0:
18342 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18343 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18344 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18345 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18346 ; AVX512VPOPCNTDQ-NEXT: retq
18348 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v2i64:
18349 ; AVX512VPOPCNTDQVL: # %bb.0:
18350 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18351 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18352 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18353 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18354 ; AVX512VPOPCNTDQVL-NEXT: retq
18356 ; BITALG_NOVLX-LABEL: ugt_5_v2i64:
18357 ; BITALG_NOVLX: # %bb.0:
18358 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18359 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18360 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18361 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18362 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18363 ; BITALG_NOVLX-NEXT: vzeroupper
18364 ; BITALG_NOVLX-NEXT: retq
18366 ; BITALG-LABEL: ugt_5_v2i64:
18368 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18369 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18370 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18371 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18372 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18373 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18374 ; BITALG-NEXT: retq
18375 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18376 %3 = icmp ugt <2 x i64> %2, <i64 5, i64 5>
18377 %4 = sext <2 x i1> %3 to <2 x i64>
18381 define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
18382 ; SSE2-LABEL: ult_6_v2i64:
18384 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18385 ; SSE2-NEXT: psrlw $1, %xmm1
18386 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18387 ; SSE2-NEXT: psubb %xmm1, %xmm0
18388 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18389 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18390 ; SSE2-NEXT: pand %xmm1, %xmm2
18391 ; SSE2-NEXT: psrlw $2, %xmm0
18392 ; SSE2-NEXT: pand %xmm1, %xmm0
18393 ; SSE2-NEXT: paddb %xmm2, %xmm0
18394 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18395 ; SSE2-NEXT: psrlw $4, %xmm1
18396 ; SSE2-NEXT: paddb %xmm0, %xmm1
18397 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18398 ; SSE2-NEXT: pxor %xmm0, %xmm0
18399 ; SSE2-NEXT: psadbw %xmm0, %xmm1
18400 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18401 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18402 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
18403 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483654,2147483654]
18404 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
18405 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18406 ; SSE2-NEXT: pand %xmm2, %xmm1
18407 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18408 ; SSE2-NEXT: por %xmm1, %xmm0
18411 ; SSE3-LABEL: ult_6_v2i64:
18413 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18414 ; SSE3-NEXT: psrlw $1, %xmm1
18415 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18416 ; SSE3-NEXT: psubb %xmm1, %xmm0
18417 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18418 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18419 ; SSE3-NEXT: pand %xmm1, %xmm2
18420 ; SSE3-NEXT: psrlw $2, %xmm0
18421 ; SSE3-NEXT: pand %xmm1, %xmm0
18422 ; SSE3-NEXT: paddb %xmm2, %xmm0
18423 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18424 ; SSE3-NEXT: psrlw $4, %xmm1
18425 ; SSE3-NEXT: paddb %xmm0, %xmm1
18426 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18427 ; SSE3-NEXT: pxor %xmm0, %xmm0
18428 ; SSE3-NEXT: psadbw %xmm0, %xmm1
18429 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18430 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18431 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
18432 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483654,2147483654]
18433 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
18434 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18435 ; SSE3-NEXT: pand %xmm2, %xmm1
18436 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18437 ; SSE3-NEXT: por %xmm1, %xmm0
18440 ; SSSE3-LABEL: ult_6_v2i64:
18442 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18443 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18444 ; SSSE3-NEXT: pand %xmm1, %xmm2
18445 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18446 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18447 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18448 ; SSSE3-NEXT: psrlw $4, %xmm0
18449 ; SSSE3-NEXT: pand %xmm1, %xmm0
18450 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18451 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18452 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18453 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
18454 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18455 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18456 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
18457 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483654,2147483654]
18458 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
18459 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
18460 ; SSSE3-NEXT: pand %xmm1, %xmm2
18461 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18462 ; SSSE3-NEXT: por %xmm2, %xmm0
18465 ; SSE41-LABEL: ult_6_v2i64:
18467 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18468 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18469 ; SSE41-NEXT: pand %xmm1, %xmm2
18470 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18471 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18472 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18473 ; SSE41-NEXT: psrlw $4, %xmm0
18474 ; SSE41-NEXT: pand %xmm1, %xmm0
18475 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18476 ; SSE41-NEXT: paddb %xmm4, %xmm3
18477 ; SSE41-NEXT: pxor %xmm0, %xmm0
18478 ; SSE41-NEXT: psadbw %xmm0, %xmm3
18479 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18480 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18481 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
18482 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483654,2147483654]
18483 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
18484 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
18485 ; SSE41-NEXT: pand %xmm1, %xmm2
18486 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18487 ; SSE41-NEXT: por %xmm2, %xmm0
18490 ; AVX1-LABEL: ult_6_v2i64:
18492 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18493 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18494 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18495 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18496 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18497 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18498 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18499 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18500 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18501 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18502 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6]
18503 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18506 ; AVX2-LABEL: ult_6_v2i64:
18508 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18509 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18510 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18511 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18512 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18513 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18514 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18515 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18516 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18517 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18518 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6]
18519 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18522 ; AVX512VPOPCNTDQ-LABEL: ult_6_v2i64:
18523 ; AVX512VPOPCNTDQ: # %bb.0:
18524 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18525 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18526 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6]
18527 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18528 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18529 ; AVX512VPOPCNTDQ-NEXT: retq
18531 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v2i64:
18532 ; AVX512VPOPCNTDQVL: # %bb.0:
18533 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18534 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18535 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18536 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18537 ; AVX512VPOPCNTDQVL-NEXT: retq
18539 ; BITALG_NOVLX-LABEL: ult_6_v2i64:
18540 ; BITALG_NOVLX: # %bb.0:
18541 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18542 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18543 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18544 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18545 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6]
18546 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18547 ; BITALG_NOVLX-NEXT: vzeroupper
18548 ; BITALG_NOVLX-NEXT: retq
18550 ; BITALG-LABEL: ult_6_v2i64:
18552 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18553 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18554 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18555 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18556 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18557 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18558 ; BITALG-NEXT: retq
18559 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18560 %3 = icmp ult <2 x i64> %2, <i64 6, i64 6>
18561 %4 = sext <2 x i1> %3 to <2 x i64>
18565 define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
18566 ; SSE2-LABEL: ugt_6_v2i64:
18568 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18569 ; SSE2-NEXT: psrlw $1, %xmm1
18570 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18571 ; SSE2-NEXT: psubb %xmm1, %xmm0
18572 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18573 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18574 ; SSE2-NEXT: pand %xmm1, %xmm2
18575 ; SSE2-NEXT: psrlw $2, %xmm0
18576 ; SSE2-NEXT: pand %xmm1, %xmm0
18577 ; SSE2-NEXT: paddb %xmm2, %xmm0
18578 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18579 ; SSE2-NEXT: psrlw $4, %xmm1
18580 ; SSE2-NEXT: paddb %xmm0, %xmm1
18581 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18582 ; SSE2-NEXT: pxor %xmm0, %xmm0
18583 ; SSE2-NEXT: psadbw %xmm0, %xmm1
18584 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18585 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18586 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
18587 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18588 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
18589 ; SSE2-NEXT: pand %xmm2, %xmm3
18590 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
18591 ; SSE2-NEXT: por %xmm3, %xmm0
18594 ; SSE3-LABEL: ugt_6_v2i64:
18596 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18597 ; SSE3-NEXT: psrlw $1, %xmm1
18598 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18599 ; SSE3-NEXT: psubb %xmm1, %xmm0
18600 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18601 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18602 ; SSE3-NEXT: pand %xmm1, %xmm2
18603 ; SSE3-NEXT: psrlw $2, %xmm0
18604 ; SSE3-NEXT: pand %xmm1, %xmm0
18605 ; SSE3-NEXT: paddb %xmm2, %xmm0
18606 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18607 ; SSE3-NEXT: psrlw $4, %xmm1
18608 ; SSE3-NEXT: paddb %xmm0, %xmm1
18609 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18610 ; SSE3-NEXT: pxor %xmm0, %xmm0
18611 ; SSE3-NEXT: psadbw %xmm0, %xmm1
18612 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18613 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18614 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
18615 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18616 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
18617 ; SSE3-NEXT: pand %xmm2, %xmm3
18618 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
18619 ; SSE3-NEXT: por %xmm3, %xmm0
18622 ; SSSE3-LABEL: ugt_6_v2i64:
18624 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18625 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18626 ; SSSE3-NEXT: pand %xmm1, %xmm2
18627 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18628 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18629 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18630 ; SSSE3-NEXT: psrlw $4, %xmm0
18631 ; SSSE3-NEXT: pand %xmm1, %xmm0
18632 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18633 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18634 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18635 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
18636 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18637 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18638 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
18639 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18640 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
18641 ; SSSE3-NEXT: pand %xmm1, %xmm2
18642 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
18643 ; SSSE3-NEXT: por %xmm2, %xmm0
18646 ; SSE41-LABEL: ugt_6_v2i64:
18648 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18649 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18650 ; SSE41-NEXT: pand %xmm1, %xmm2
18651 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18652 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18653 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18654 ; SSE41-NEXT: psrlw $4, %xmm0
18655 ; SSE41-NEXT: pand %xmm1, %xmm0
18656 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18657 ; SSE41-NEXT: paddb %xmm4, %xmm3
18658 ; SSE41-NEXT: pxor %xmm0, %xmm0
18659 ; SSE41-NEXT: psadbw %xmm0, %xmm3
18660 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18661 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18662 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
18663 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18664 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
18665 ; SSE41-NEXT: pand %xmm1, %xmm2
18666 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
18667 ; SSE41-NEXT: por %xmm2, %xmm0
18670 ; AVX1-LABEL: ugt_6_v2i64:
18672 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18673 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18674 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18675 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18676 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18677 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18678 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18679 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18680 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18681 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18682 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18685 ; AVX2-LABEL: ugt_6_v2i64:
18687 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18688 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18689 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18690 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18691 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18692 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18693 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18694 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18695 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18696 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18697 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18700 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v2i64:
18701 ; AVX512VPOPCNTDQ: # %bb.0:
18702 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18703 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18704 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18705 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18706 ; AVX512VPOPCNTDQ-NEXT: retq
18708 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v2i64:
18709 ; AVX512VPOPCNTDQVL: # %bb.0:
18710 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18711 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18712 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18713 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18714 ; AVX512VPOPCNTDQVL-NEXT: retq
18716 ; BITALG_NOVLX-LABEL: ugt_6_v2i64:
18717 ; BITALG_NOVLX: # %bb.0:
18718 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18719 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18720 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18721 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18722 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18723 ; BITALG_NOVLX-NEXT: vzeroupper
18724 ; BITALG_NOVLX-NEXT: retq
18726 ; BITALG-LABEL: ugt_6_v2i64:
18728 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18729 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18730 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18731 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18732 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18733 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18734 ; BITALG-NEXT: retq
18735 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18736 %3 = icmp ugt <2 x i64> %2, <i64 6, i64 6>
18737 %4 = sext <2 x i1> %3 to <2 x i64>
18741 define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
18742 ; SSE2-LABEL: ult_7_v2i64:
18744 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18745 ; SSE2-NEXT: psrlw $1, %xmm1
18746 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18747 ; SSE2-NEXT: psubb %xmm1, %xmm0
18748 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18749 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18750 ; SSE2-NEXT: pand %xmm1, %xmm2
18751 ; SSE2-NEXT: psrlw $2, %xmm0
18752 ; SSE2-NEXT: pand %xmm1, %xmm0
18753 ; SSE2-NEXT: paddb %xmm2, %xmm0
18754 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18755 ; SSE2-NEXT: psrlw $4, %xmm1
18756 ; SSE2-NEXT: paddb %xmm0, %xmm1
18757 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18758 ; SSE2-NEXT: pxor %xmm0, %xmm0
18759 ; SSE2-NEXT: psadbw %xmm0, %xmm1
18760 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18761 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18762 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
18763 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483655,2147483655]
18764 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
18765 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18766 ; SSE2-NEXT: pand %xmm2, %xmm1
18767 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18768 ; SSE2-NEXT: por %xmm1, %xmm0
18771 ; SSE3-LABEL: ult_7_v2i64:
18773 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18774 ; SSE3-NEXT: psrlw $1, %xmm1
18775 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18776 ; SSE3-NEXT: psubb %xmm1, %xmm0
18777 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18778 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18779 ; SSE3-NEXT: pand %xmm1, %xmm2
18780 ; SSE3-NEXT: psrlw $2, %xmm0
18781 ; SSE3-NEXT: pand %xmm1, %xmm0
18782 ; SSE3-NEXT: paddb %xmm2, %xmm0
18783 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18784 ; SSE3-NEXT: psrlw $4, %xmm1
18785 ; SSE3-NEXT: paddb %xmm0, %xmm1
18786 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18787 ; SSE3-NEXT: pxor %xmm0, %xmm0
18788 ; SSE3-NEXT: psadbw %xmm0, %xmm1
18789 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18790 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18791 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
18792 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483655,2147483655]
18793 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
18794 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18795 ; SSE3-NEXT: pand %xmm2, %xmm1
18796 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18797 ; SSE3-NEXT: por %xmm1, %xmm0
18800 ; SSSE3-LABEL: ult_7_v2i64:
18802 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18803 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18804 ; SSSE3-NEXT: pand %xmm1, %xmm2
18805 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18806 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18807 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18808 ; SSSE3-NEXT: psrlw $4, %xmm0
18809 ; SSSE3-NEXT: pand %xmm1, %xmm0
18810 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18811 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18812 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18813 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
18814 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18815 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18816 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
18817 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483655,2147483655]
18818 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
18819 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
18820 ; SSSE3-NEXT: pand %xmm1, %xmm2
18821 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18822 ; SSSE3-NEXT: por %xmm2, %xmm0
18825 ; SSE41-LABEL: ult_7_v2i64:
18827 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18828 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18829 ; SSE41-NEXT: pand %xmm1, %xmm2
18830 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18831 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18832 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18833 ; SSE41-NEXT: psrlw $4, %xmm0
18834 ; SSE41-NEXT: pand %xmm1, %xmm0
18835 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18836 ; SSE41-NEXT: paddb %xmm4, %xmm3
18837 ; SSE41-NEXT: pxor %xmm0, %xmm0
18838 ; SSE41-NEXT: psadbw %xmm0, %xmm3
18839 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18840 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18841 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
18842 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483655,2147483655]
18843 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
18844 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
18845 ; SSE41-NEXT: pand %xmm1, %xmm2
18846 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
18847 ; SSE41-NEXT: por %xmm2, %xmm0
18850 ; AVX1-LABEL: ult_7_v2i64:
18852 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18853 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18854 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18855 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18856 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18857 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18858 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18859 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18860 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18861 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18862 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7]
18863 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18866 ; AVX2-LABEL: ult_7_v2i64:
18868 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18869 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18870 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18871 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18872 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18873 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18874 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18875 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18876 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18877 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18878 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7]
18879 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18882 ; AVX512VPOPCNTDQ-LABEL: ult_7_v2i64:
18883 ; AVX512VPOPCNTDQ: # %bb.0:
18884 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18885 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18886 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7]
18887 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18888 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18889 ; AVX512VPOPCNTDQ-NEXT: retq
18891 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v2i64:
18892 ; AVX512VPOPCNTDQVL: # %bb.0:
18893 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18894 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18895 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18896 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18897 ; AVX512VPOPCNTDQVL-NEXT: retq
18899 ; BITALG_NOVLX-LABEL: ult_7_v2i64:
18900 ; BITALG_NOVLX: # %bb.0:
18901 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18902 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18903 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18904 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18905 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7]
18906 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18907 ; BITALG_NOVLX-NEXT: vzeroupper
18908 ; BITALG_NOVLX-NEXT: retq
18910 ; BITALG-LABEL: ult_7_v2i64:
18912 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18913 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18914 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18915 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
18916 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
18917 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
18918 ; BITALG-NEXT: retq
18919 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18920 %3 = icmp ult <2 x i64> %2, <i64 7, i64 7>
18921 %4 = sext <2 x i1> %3 to <2 x i64>
18925 define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
18926 ; SSE2-LABEL: ugt_7_v2i64:
18928 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18929 ; SSE2-NEXT: psrlw $1, %xmm1
18930 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18931 ; SSE2-NEXT: psubb %xmm1, %xmm0
18932 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18933 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18934 ; SSE2-NEXT: pand %xmm1, %xmm2
18935 ; SSE2-NEXT: psrlw $2, %xmm0
18936 ; SSE2-NEXT: pand %xmm1, %xmm0
18937 ; SSE2-NEXT: paddb %xmm2, %xmm0
18938 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18939 ; SSE2-NEXT: psrlw $4, %xmm1
18940 ; SSE2-NEXT: paddb %xmm0, %xmm1
18941 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18942 ; SSE2-NEXT: pxor %xmm0, %xmm0
18943 ; SSE2-NEXT: psadbw %xmm0, %xmm1
18944 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18945 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18946 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
18947 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18948 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
18949 ; SSE2-NEXT: pand %xmm2, %xmm3
18950 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
18951 ; SSE2-NEXT: por %xmm3, %xmm0
18954 ; SSE3-LABEL: ugt_7_v2i64:
18956 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18957 ; SSE3-NEXT: psrlw $1, %xmm1
18958 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18959 ; SSE3-NEXT: psubb %xmm1, %xmm0
18960 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18961 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18962 ; SSE3-NEXT: pand %xmm1, %xmm2
18963 ; SSE3-NEXT: psrlw $2, %xmm0
18964 ; SSE3-NEXT: pand %xmm1, %xmm0
18965 ; SSE3-NEXT: paddb %xmm2, %xmm0
18966 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18967 ; SSE3-NEXT: psrlw $4, %xmm1
18968 ; SSE3-NEXT: paddb %xmm0, %xmm1
18969 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18970 ; SSE3-NEXT: pxor %xmm0, %xmm0
18971 ; SSE3-NEXT: psadbw %xmm0, %xmm1
18972 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18973 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
18974 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
18975 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18976 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
18977 ; SSE3-NEXT: pand %xmm2, %xmm3
18978 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
18979 ; SSE3-NEXT: por %xmm3, %xmm0
18982 ; SSSE3-LABEL: ugt_7_v2i64:
18984 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18985 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18986 ; SSSE3-NEXT: pand %xmm1, %xmm2
18987 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18988 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18989 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18990 ; SSSE3-NEXT: psrlw $4, %xmm0
18991 ; SSSE3-NEXT: pand %xmm1, %xmm0
18992 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18993 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18994 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18995 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
18996 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
18997 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
18998 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
18999 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19000 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
19001 ; SSSE3-NEXT: pand %xmm1, %xmm2
19002 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
19003 ; SSSE3-NEXT: por %xmm2, %xmm0
19006 ; SSE41-LABEL: ugt_7_v2i64:
19008 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19009 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19010 ; SSE41-NEXT: pand %xmm1, %xmm2
19011 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19012 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19013 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19014 ; SSE41-NEXT: psrlw $4, %xmm0
19015 ; SSE41-NEXT: pand %xmm1, %xmm0
19016 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19017 ; SSE41-NEXT: paddb %xmm4, %xmm3
19018 ; SSE41-NEXT: pxor %xmm0, %xmm0
19019 ; SSE41-NEXT: psadbw %xmm0, %xmm3
19020 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19021 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19022 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
19023 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19024 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
19025 ; SSE41-NEXT: pand %xmm1, %xmm2
19026 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
19027 ; SSE41-NEXT: por %xmm2, %xmm0
19030 ; AVX1-LABEL: ugt_7_v2i64:
19032 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19033 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19034 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19035 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19036 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19037 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19038 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19039 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19040 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19041 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19042 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19045 ; AVX2-LABEL: ugt_7_v2i64:
19047 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19048 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19049 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19050 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19051 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19052 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19053 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19054 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19055 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19056 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19057 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19060 ; AVX512VPOPCNTDQ-LABEL: ugt_7_v2i64:
19061 ; AVX512VPOPCNTDQ: # %bb.0:
19062 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19063 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19064 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19065 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19066 ; AVX512VPOPCNTDQ-NEXT: retq
19068 ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v2i64:
19069 ; AVX512VPOPCNTDQVL: # %bb.0:
19070 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19071 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19072 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19073 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19074 ; AVX512VPOPCNTDQVL-NEXT: retq
19076 ; BITALG_NOVLX-LABEL: ugt_7_v2i64:
19077 ; BITALG_NOVLX: # %bb.0:
19078 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19079 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19080 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19081 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19082 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19083 ; BITALG_NOVLX-NEXT: vzeroupper
19084 ; BITALG_NOVLX-NEXT: retq
19086 ; BITALG-LABEL: ugt_7_v2i64:
19088 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19089 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19090 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19091 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19092 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19093 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19094 ; BITALG-NEXT: retq
19095 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19096 %3 = icmp ugt <2 x i64> %2, <i64 7, i64 7>
19097 %4 = sext <2 x i1> %3 to <2 x i64>
19101 define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
19102 ; SSE2-LABEL: ult_8_v2i64:
19104 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19105 ; SSE2-NEXT: psrlw $1, %xmm1
19106 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19107 ; SSE2-NEXT: psubb %xmm1, %xmm0
19108 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19109 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19110 ; SSE2-NEXT: pand %xmm1, %xmm2
19111 ; SSE2-NEXT: psrlw $2, %xmm0
19112 ; SSE2-NEXT: pand %xmm1, %xmm0
19113 ; SSE2-NEXT: paddb %xmm2, %xmm0
19114 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19115 ; SSE2-NEXT: psrlw $4, %xmm1
19116 ; SSE2-NEXT: paddb %xmm0, %xmm1
19117 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19118 ; SSE2-NEXT: pxor %xmm0, %xmm0
19119 ; SSE2-NEXT: psadbw %xmm0, %xmm1
19120 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19121 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
19122 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
19123 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483656,2147483656]
19124 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
19125 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19126 ; SSE2-NEXT: pand %xmm2, %xmm1
19127 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19128 ; SSE2-NEXT: por %xmm1, %xmm0
19131 ; SSE3-LABEL: ult_8_v2i64:
19133 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19134 ; SSE3-NEXT: psrlw $1, %xmm1
19135 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19136 ; SSE3-NEXT: psubb %xmm1, %xmm0
19137 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19138 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19139 ; SSE3-NEXT: pand %xmm1, %xmm2
19140 ; SSE3-NEXT: psrlw $2, %xmm0
19141 ; SSE3-NEXT: pand %xmm1, %xmm0
19142 ; SSE3-NEXT: paddb %xmm2, %xmm0
19143 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19144 ; SSE3-NEXT: psrlw $4, %xmm1
19145 ; SSE3-NEXT: paddb %xmm0, %xmm1
19146 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19147 ; SSE3-NEXT: pxor %xmm0, %xmm0
19148 ; SSE3-NEXT: psadbw %xmm0, %xmm1
19149 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19150 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
19151 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
19152 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483656,2147483656]
19153 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
19154 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19155 ; SSE3-NEXT: pand %xmm2, %xmm1
19156 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19157 ; SSE3-NEXT: por %xmm1, %xmm0
19160 ; SSSE3-LABEL: ult_8_v2i64:
19162 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19163 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19164 ; SSSE3-NEXT: pand %xmm1, %xmm2
19165 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19166 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19167 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19168 ; SSSE3-NEXT: psrlw $4, %xmm0
19169 ; SSSE3-NEXT: pand %xmm1, %xmm0
19170 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19171 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19172 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19173 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
19174 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19175 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19176 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
19177 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483656,2147483656]
19178 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
19179 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
19180 ; SSSE3-NEXT: pand %xmm1, %xmm2
19181 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19182 ; SSSE3-NEXT: por %xmm2, %xmm0
19185 ; SSE41-LABEL: ult_8_v2i64:
19187 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19188 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19189 ; SSE41-NEXT: pand %xmm1, %xmm2
19190 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19191 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19192 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19193 ; SSE41-NEXT: psrlw $4, %xmm0
19194 ; SSE41-NEXT: pand %xmm1, %xmm0
19195 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19196 ; SSE41-NEXT: paddb %xmm4, %xmm3
19197 ; SSE41-NEXT: pxor %xmm0, %xmm0
19198 ; SSE41-NEXT: psadbw %xmm0, %xmm3
19199 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19200 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19201 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
19202 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483656,2147483656]
19203 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
19204 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
19205 ; SSE41-NEXT: pand %xmm1, %xmm2
19206 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19207 ; SSE41-NEXT: por %xmm2, %xmm0
19210 ; AVX1-LABEL: ult_8_v2i64:
19212 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19213 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19214 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19215 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19216 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19217 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19218 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19219 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19220 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19221 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19222 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8]
19223 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19226 ; AVX2-LABEL: ult_8_v2i64:
19228 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19229 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19230 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19231 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19232 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19233 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19234 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19235 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19236 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19237 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19238 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8]
19239 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19242 ; AVX512VPOPCNTDQ-LABEL: ult_8_v2i64:
19243 ; AVX512VPOPCNTDQ: # %bb.0:
19244 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19245 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19246 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8]
19247 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19248 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19249 ; AVX512VPOPCNTDQ-NEXT: retq
19251 ; AVX512VPOPCNTDQVL-LABEL: ult_8_v2i64:
19252 ; AVX512VPOPCNTDQVL: # %bb.0:
19253 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19254 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19255 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19256 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19257 ; AVX512VPOPCNTDQVL-NEXT: retq
19259 ; BITALG_NOVLX-LABEL: ult_8_v2i64:
19260 ; BITALG_NOVLX: # %bb.0:
19261 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19262 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19263 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19264 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19265 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8]
19266 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19267 ; BITALG_NOVLX-NEXT: vzeroupper
19268 ; BITALG_NOVLX-NEXT: retq
19270 ; BITALG-LABEL: ult_8_v2i64:
19272 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19273 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19274 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19275 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19276 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19277 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19278 ; BITALG-NEXT: retq
19279 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19280 %3 = icmp ult <2 x i64> %2, <i64 8, i64 8>
19281 %4 = sext <2 x i1> %3 to <2 x i64>
19285 define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) {
19286 ; SSE2-LABEL: ugt_8_v2i64:
19288 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19289 ; SSE2-NEXT: psrlw $1, %xmm1
19290 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19291 ; SSE2-NEXT: psubb %xmm1, %xmm0
19292 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19293 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19294 ; SSE2-NEXT: pand %xmm1, %xmm2
19295 ; SSE2-NEXT: psrlw $2, %xmm0
19296 ; SSE2-NEXT: pand %xmm1, %xmm0
19297 ; SSE2-NEXT: paddb %xmm2, %xmm0
19298 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19299 ; SSE2-NEXT: psrlw $4, %xmm1
19300 ; SSE2-NEXT: paddb %xmm0, %xmm1
19301 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19302 ; SSE2-NEXT: pxor %xmm0, %xmm0
19303 ; SSE2-NEXT: psadbw %xmm0, %xmm1
19304 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19305 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
19306 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
19307 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19308 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
19309 ; SSE2-NEXT: pand %xmm2, %xmm3
19310 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
19311 ; SSE2-NEXT: por %xmm3, %xmm0
19314 ; SSE3-LABEL: ugt_8_v2i64:
19316 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19317 ; SSE3-NEXT: psrlw $1, %xmm1
19318 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19319 ; SSE3-NEXT: psubb %xmm1, %xmm0
19320 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19321 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19322 ; SSE3-NEXT: pand %xmm1, %xmm2
19323 ; SSE3-NEXT: psrlw $2, %xmm0
19324 ; SSE3-NEXT: pand %xmm1, %xmm0
19325 ; SSE3-NEXT: paddb %xmm2, %xmm0
19326 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19327 ; SSE3-NEXT: psrlw $4, %xmm1
19328 ; SSE3-NEXT: paddb %xmm0, %xmm1
19329 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19330 ; SSE3-NEXT: pxor %xmm0, %xmm0
19331 ; SSE3-NEXT: psadbw %xmm0, %xmm1
19332 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19333 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
19334 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
19335 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19336 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
19337 ; SSE3-NEXT: pand %xmm2, %xmm3
19338 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
19339 ; SSE3-NEXT: por %xmm3, %xmm0
19342 ; SSSE3-LABEL: ugt_8_v2i64:
19344 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19345 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19346 ; SSSE3-NEXT: pand %xmm1, %xmm2
19347 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19348 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19349 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19350 ; SSSE3-NEXT: psrlw $4, %xmm0
19351 ; SSSE3-NEXT: pand %xmm1, %xmm0
19352 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19353 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19354 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19355 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
19356 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19357 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19358 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
19359 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19360 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
19361 ; SSSE3-NEXT: pand %xmm1, %xmm2
19362 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
19363 ; SSSE3-NEXT: por %xmm2, %xmm0
19366 ; SSE41-LABEL: ugt_8_v2i64:
19368 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19369 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19370 ; SSE41-NEXT: pand %xmm1, %xmm2
19371 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19372 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19373 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19374 ; SSE41-NEXT: psrlw $4, %xmm0
19375 ; SSE41-NEXT: pand %xmm1, %xmm0
19376 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19377 ; SSE41-NEXT: paddb %xmm4, %xmm3
19378 ; SSE41-NEXT: pxor %xmm0, %xmm0
19379 ; SSE41-NEXT: psadbw %xmm0, %xmm3
19380 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19381 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19382 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
19383 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19384 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
19385 ; SSE41-NEXT: pand %xmm1, %xmm2
19386 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
19387 ; SSE41-NEXT: por %xmm2, %xmm0
19390 ; AVX1-LABEL: ugt_8_v2i64:
19392 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19393 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19394 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19395 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19396 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19397 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19398 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19399 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19400 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19401 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19402 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19405 ; AVX2-LABEL: ugt_8_v2i64:
19407 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19408 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19409 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19410 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19411 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19412 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19413 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19414 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19415 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19416 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19417 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19420 ; AVX512VPOPCNTDQ-LABEL: ugt_8_v2i64:
19421 ; AVX512VPOPCNTDQ: # %bb.0:
19422 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19423 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19424 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19425 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19426 ; AVX512VPOPCNTDQ-NEXT: retq
19428 ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v2i64:
19429 ; AVX512VPOPCNTDQVL: # %bb.0:
19430 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19431 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19432 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19433 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19434 ; AVX512VPOPCNTDQVL-NEXT: retq
19436 ; BITALG_NOVLX-LABEL: ugt_8_v2i64:
19437 ; BITALG_NOVLX: # %bb.0:
19438 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19439 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19440 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19441 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19442 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19443 ; BITALG_NOVLX-NEXT: vzeroupper
19444 ; BITALG_NOVLX-NEXT: retq
19446 ; BITALG-LABEL: ugt_8_v2i64:
19448 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19449 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19450 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19451 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19452 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19453 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19454 ; BITALG-NEXT: retq
19455 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19456 %3 = icmp ugt <2 x i64> %2, <i64 8, i64 8>
19457 %4 = sext <2 x i1> %3 to <2 x i64>
19461 define <2 x i64> @ult_9_v2i64(<2 x i64> %0) {
19462 ; SSE2-LABEL: ult_9_v2i64:
19464 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19465 ; SSE2-NEXT: psrlw $1, %xmm1
19466 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19467 ; SSE2-NEXT: psubb %xmm1, %xmm0
19468 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19469 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19470 ; SSE2-NEXT: pand %xmm1, %xmm2
19471 ; SSE2-NEXT: psrlw $2, %xmm0
19472 ; SSE2-NEXT: pand %xmm1, %xmm0
19473 ; SSE2-NEXT: paddb %xmm2, %xmm0
19474 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19475 ; SSE2-NEXT: psrlw $4, %xmm1
19476 ; SSE2-NEXT: paddb %xmm0, %xmm1
19477 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19478 ; SSE2-NEXT: pxor %xmm0, %xmm0
19479 ; SSE2-NEXT: psadbw %xmm0, %xmm1
19480 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19481 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
19482 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
19483 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483657,2147483657]
19484 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
19485 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19486 ; SSE2-NEXT: pand %xmm2, %xmm1
19487 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19488 ; SSE2-NEXT: por %xmm1, %xmm0
19491 ; SSE3-LABEL: ult_9_v2i64:
19493 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19494 ; SSE3-NEXT: psrlw $1, %xmm1
19495 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19496 ; SSE3-NEXT: psubb %xmm1, %xmm0
19497 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19498 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19499 ; SSE3-NEXT: pand %xmm1, %xmm2
19500 ; SSE3-NEXT: psrlw $2, %xmm0
19501 ; SSE3-NEXT: pand %xmm1, %xmm0
19502 ; SSE3-NEXT: paddb %xmm2, %xmm0
19503 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19504 ; SSE3-NEXT: psrlw $4, %xmm1
19505 ; SSE3-NEXT: paddb %xmm0, %xmm1
19506 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19507 ; SSE3-NEXT: pxor %xmm0, %xmm0
19508 ; SSE3-NEXT: psadbw %xmm0, %xmm1
19509 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19510 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
19511 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
19512 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483657,2147483657]
19513 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
19514 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19515 ; SSE3-NEXT: pand %xmm2, %xmm1
19516 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19517 ; SSE3-NEXT: por %xmm1, %xmm0
19520 ; SSSE3-LABEL: ult_9_v2i64:
19522 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19523 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19524 ; SSSE3-NEXT: pand %xmm1, %xmm2
19525 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19526 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19527 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19528 ; SSSE3-NEXT: psrlw $4, %xmm0
19529 ; SSSE3-NEXT: pand %xmm1, %xmm0
19530 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19531 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19532 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19533 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
19534 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19535 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19536 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
19537 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483657,2147483657]
19538 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
19539 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
19540 ; SSSE3-NEXT: pand %xmm1, %xmm2
19541 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19542 ; SSSE3-NEXT: por %xmm2, %xmm0
19545 ; SSE41-LABEL: ult_9_v2i64:
19547 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19548 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19549 ; SSE41-NEXT: pand %xmm1, %xmm2
19550 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19551 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19552 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19553 ; SSE41-NEXT: psrlw $4, %xmm0
19554 ; SSE41-NEXT: pand %xmm1, %xmm0
19555 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19556 ; SSE41-NEXT: paddb %xmm4, %xmm3
19557 ; SSE41-NEXT: pxor %xmm0, %xmm0
19558 ; SSE41-NEXT: psadbw %xmm0, %xmm3
19559 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19560 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19561 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
19562 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483657,2147483657]
19563 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
19564 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
19565 ; SSE41-NEXT: pand %xmm1, %xmm2
19566 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19567 ; SSE41-NEXT: por %xmm2, %xmm0
19570 ; AVX1-LABEL: ult_9_v2i64:
19572 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19573 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19574 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19575 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19576 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19577 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19578 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19579 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19580 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19581 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19582 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9]
19583 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19586 ; AVX2-LABEL: ult_9_v2i64:
19588 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19589 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19590 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19591 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19592 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19593 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19594 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19595 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19596 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19597 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19598 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9]
19599 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19602 ; AVX512VPOPCNTDQ-LABEL: ult_9_v2i64:
19603 ; AVX512VPOPCNTDQ: # %bb.0:
19604 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19605 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19606 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9]
19607 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19608 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19609 ; AVX512VPOPCNTDQ-NEXT: retq
19611 ; AVX512VPOPCNTDQVL-LABEL: ult_9_v2i64:
19612 ; AVX512VPOPCNTDQVL: # %bb.0:
19613 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19614 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19615 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19616 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19617 ; AVX512VPOPCNTDQVL-NEXT: retq
19619 ; BITALG_NOVLX-LABEL: ult_9_v2i64:
19620 ; BITALG_NOVLX: # %bb.0:
19621 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19622 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19623 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19624 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19625 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9]
19626 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19627 ; BITALG_NOVLX-NEXT: vzeroupper
19628 ; BITALG_NOVLX-NEXT: retq
19630 ; BITALG-LABEL: ult_9_v2i64:
19632 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19633 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19634 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19635 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19636 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19637 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19638 ; BITALG-NEXT: retq
19639 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19640 %3 = icmp ult <2 x i64> %2, <i64 9, i64 9>
19641 %4 = sext <2 x i1> %3 to <2 x i64>
19645 define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) {
19646 ; SSE2-LABEL: ugt_9_v2i64:
19648 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19649 ; SSE2-NEXT: psrlw $1, %xmm1
19650 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19651 ; SSE2-NEXT: psubb %xmm1, %xmm0
19652 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19653 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19654 ; SSE2-NEXT: pand %xmm1, %xmm2
19655 ; SSE2-NEXT: psrlw $2, %xmm0
19656 ; SSE2-NEXT: pand %xmm1, %xmm0
19657 ; SSE2-NEXT: paddb %xmm2, %xmm0
19658 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19659 ; SSE2-NEXT: psrlw $4, %xmm1
19660 ; SSE2-NEXT: paddb %xmm0, %xmm1
19661 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19662 ; SSE2-NEXT: pxor %xmm0, %xmm0
19663 ; SSE2-NEXT: psadbw %xmm0, %xmm1
19664 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19665 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
19666 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
19667 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19668 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
19669 ; SSE2-NEXT: pand %xmm2, %xmm3
19670 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
19671 ; SSE2-NEXT: por %xmm3, %xmm0
19674 ; SSE3-LABEL: ugt_9_v2i64:
19676 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19677 ; SSE3-NEXT: psrlw $1, %xmm1
19678 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19679 ; SSE3-NEXT: psubb %xmm1, %xmm0
19680 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19681 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19682 ; SSE3-NEXT: pand %xmm1, %xmm2
19683 ; SSE3-NEXT: psrlw $2, %xmm0
19684 ; SSE3-NEXT: pand %xmm1, %xmm0
19685 ; SSE3-NEXT: paddb %xmm2, %xmm0
19686 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19687 ; SSE3-NEXT: psrlw $4, %xmm1
19688 ; SSE3-NEXT: paddb %xmm0, %xmm1
19689 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19690 ; SSE3-NEXT: pxor %xmm0, %xmm0
19691 ; SSE3-NEXT: psadbw %xmm0, %xmm1
19692 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19693 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
19694 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
19695 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19696 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
19697 ; SSE3-NEXT: pand %xmm2, %xmm3
19698 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
19699 ; SSE3-NEXT: por %xmm3, %xmm0
19702 ; SSSE3-LABEL: ugt_9_v2i64:
19704 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19705 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19706 ; SSSE3-NEXT: pand %xmm1, %xmm2
19707 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19708 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19709 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19710 ; SSSE3-NEXT: psrlw $4, %xmm0
19711 ; SSSE3-NEXT: pand %xmm1, %xmm0
19712 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19713 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19714 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19715 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
19716 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19717 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19718 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
19719 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19720 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
19721 ; SSSE3-NEXT: pand %xmm1, %xmm2
19722 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
19723 ; SSSE3-NEXT: por %xmm2, %xmm0
19726 ; SSE41-LABEL: ugt_9_v2i64:
19728 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19729 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19730 ; SSE41-NEXT: pand %xmm1, %xmm2
19731 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19732 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19733 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19734 ; SSE41-NEXT: psrlw $4, %xmm0
19735 ; SSE41-NEXT: pand %xmm1, %xmm0
19736 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19737 ; SSE41-NEXT: paddb %xmm4, %xmm3
19738 ; SSE41-NEXT: pxor %xmm0, %xmm0
19739 ; SSE41-NEXT: psadbw %xmm0, %xmm3
19740 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19741 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19742 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
19743 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19744 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
19745 ; SSE41-NEXT: pand %xmm1, %xmm2
19746 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
19747 ; SSE41-NEXT: por %xmm2, %xmm0
19750 ; AVX1-LABEL: ugt_9_v2i64:
19752 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19753 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19754 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19755 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19756 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19757 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19758 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19759 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19760 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19761 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19762 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19765 ; AVX2-LABEL: ugt_9_v2i64:
19767 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19768 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19769 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19770 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19771 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19772 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19773 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19774 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19775 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19776 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19777 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19780 ; AVX512VPOPCNTDQ-LABEL: ugt_9_v2i64:
19781 ; AVX512VPOPCNTDQ: # %bb.0:
19782 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19783 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19784 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19785 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19786 ; AVX512VPOPCNTDQ-NEXT: retq
19788 ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v2i64:
19789 ; AVX512VPOPCNTDQVL: # %bb.0:
19790 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19791 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19792 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19793 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19794 ; AVX512VPOPCNTDQVL-NEXT: retq
19796 ; BITALG_NOVLX-LABEL: ugt_9_v2i64:
19797 ; BITALG_NOVLX: # %bb.0:
19798 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19799 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19800 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19801 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19802 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19803 ; BITALG_NOVLX-NEXT: vzeroupper
19804 ; BITALG_NOVLX-NEXT: retq
19806 ; BITALG-LABEL: ugt_9_v2i64:
19808 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19809 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19810 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19811 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19812 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19813 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19814 ; BITALG-NEXT: retq
19815 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19816 %3 = icmp ugt <2 x i64> %2, <i64 9, i64 9>
19817 %4 = sext <2 x i1> %3 to <2 x i64>
19821 define <2 x i64> @ult_10_v2i64(<2 x i64> %0) {
19822 ; SSE2-LABEL: ult_10_v2i64:
19824 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19825 ; SSE2-NEXT: psrlw $1, %xmm1
19826 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19827 ; SSE2-NEXT: psubb %xmm1, %xmm0
19828 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19829 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19830 ; SSE2-NEXT: pand %xmm1, %xmm2
19831 ; SSE2-NEXT: psrlw $2, %xmm0
19832 ; SSE2-NEXT: pand %xmm1, %xmm0
19833 ; SSE2-NEXT: paddb %xmm2, %xmm0
19834 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19835 ; SSE2-NEXT: psrlw $4, %xmm1
19836 ; SSE2-NEXT: paddb %xmm0, %xmm1
19837 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19838 ; SSE2-NEXT: pxor %xmm0, %xmm0
19839 ; SSE2-NEXT: psadbw %xmm0, %xmm1
19840 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19841 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
19842 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
19843 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483658,2147483658]
19844 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
19845 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19846 ; SSE2-NEXT: pand %xmm2, %xmm1
19847 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19848 ; SSE2-NEXT: por %xmm1, %xmm0
19851 ; SSE3-LABEL: ult_10_v2i64:
19853 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19854 ; SSE3-NEXT: psrlw $1, %xmm1
19855 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19856 ; SSE3-NEXT: psubb %xmm1, %xmm0
19857 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19858 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19859 ; SSE3-NEXT: pand %xmm1, %xmm2
19860 ; SSE3-NEXT: psrlw $2, %xmm0
19861 ; SSE3-NEXT: pand %xmm1, %xmm0
19862 ; SSE3-NEXT: paddb %xmm2, %xmm0
19863 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19864 ; SSE3-NEXT: psrlw $4, %xmm1
19865 ; SSE3-NEXT: paddb %xmm0, %xmm1
19866 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19867 ; SSE3-NEXT: pxor %xmm0, %xmm0
19868 ; SSE3-NEXT: psadbw %xmm0, %xmm1
19869 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19870 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
19871 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
19872 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483658,2147483658]
19873 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
19874 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19875 ; SSE3-NEXT: pand %xmm2, %xmm1
19876 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19877 ; SSE3-NEXT: por %xmm1, %xmm0
19880 ; SSSE3-LABEL: ult_10_v2i64:
19882 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19883 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19884 ; SSSE3-NEXT: pand %xmm1, %xmm2
19885 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19886 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19887 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19888 ; SSSE3-NEXT: psrlw $4, %xmm0
19889 ; SSSE3-NEXT: pand %xmm1, %xmm0
19890 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19891 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19892 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19893 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
19894 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19895 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19896 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
19897 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483658,2147483658]
19898 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
19899 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
19900 ; SSSE3-NEXT: pand %xmm1, %xmm2
19901 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19902 ; SSSE3-NEXT: por %xmm2, %xmm0
19905 ; SSE41-LABEL: ult_10_v2i64:
19907 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19908 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19909 ; SSE41-NEXT: pand %xmm1, %xmm2
19910 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19911 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19912 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19913 ; SSE41-NEXT: psrlw $4, %xmm0
19914 ; SSE41-NEXT: pand %xmm1, %xmm0
19915 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19916 ; SSE41-NEXT: paddb %xmm4, %xmm3
19917 ; SSE41-NEXT: pxor %xmm0, %xmm0
19918 ; SSE41-NEXT: psadbw %xmm0, %xmm3
19919 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
19920 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
19921 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
19922 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483658,2147483658]
19923 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
19924 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
19925 ; SSE41-NEXT: pand %xmm1, %xmm2
19926 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
19927 ; SSE41-NEXT: por %xmm2, %xmm0
19930 ; AVX1-LABEL: ult_10_v2i64:
19932 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19933 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19934 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19935 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19936 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19937 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19938 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19939 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19940 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19941 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19942 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10]
19943 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19946 ; AVX2-LABEL: ult_10_v2i64:
19948 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19949 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19950 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19951 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19952 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19953 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19954 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19955 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19956 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19957 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19958 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10]
19959 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19962 ; AVX512VPOPCNTDQ-LABEL: ult_10_v2i64:
19963 ; AVX512VPOPCNTDQ: # %bb.0:
19964 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19965 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19966 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10]
19967 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19968 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19969 ; AVX512VPOPCNTDQ-NEXT: retq
19971 ; AVX512VPOPCNTDQVL-LABEL: ult_10_v2i64:
19972 ; AVX512VPOPCNTDQVL: # %bb.0:
19973 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19974 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19975 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19976 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19977 ; AVX512VPOPCNTDQVL-NEXT: retq
19979 ; BITALG_NOVLX-LABEL: ult_10_v2i64:
19980 ; BITALG_NOVLX: # %bb.0:
19981 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19982 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19983 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19984 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19985 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10]
19986 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19987 ; BITALG_NOVLX-NEXT: vzeroupper
19988 ; BITALG_NOVLX-NEXT: retq
19990 ; BITALG-LABEL: ult_10_v2i64:
19992 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19993 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19994 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19995 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
19996 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
19997 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
19998 ; BITALG-NEXT: retq
19999 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20000 %3 = icmp ult <2 x i64> %2, <i64 10, i64 10>
20001 %4 = sext <2 x i1> %3 to <2 x i64>
20005 define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) {
20006 ; SSE2-LABEL: ugt_10_v2i64:
20008 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20009 ; SSE2-NEXT: psrlw $1, %xmm1
20010 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20011 ; SSE2-NEXT: psubb %xmm1, %xmm0
20012 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20013 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20014 ; SSE2-NEXT: pand %xmm1, %xmm2
20015 ; SSE2-NEXT: psrlw $2, %xmm0
20016 ; SSE2-NEXT: pand %xmm1, %xmm0
20017 ; SSE2-NEXT: paddb %xmm2, %xmm0
20018 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20019 ; SSE2-NEXT: psrlw $4, %xmm1
20020 ; SSE2-NEXT: paddb %xmm0, %xmm1
20021 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20022 ; SSE2-NEXT: pxor %xmm0, %xmm0
20023 ; SSE2-NEXT: psadbw %xmm0, %xmm1
20024 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20025 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20026 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
20027 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20028 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
20029 ; SSE2-NEXT: pand %xmm2, %xmm3
20030 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
20031 ; SSE2-NEXT: por %xmm3, %xmm0
20034 ; SSE3-LABEL: ugt_10_v2i64:
20036 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20037 ; SSE3-NEXT: psrlw $1, %xmm1
20038 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20039 ; SSE3-NEXT: psubb %xmm1, %xmm0
20040 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20041 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20042 ; SSE3-NEXT: pand %xmm1, %xmm2
20043 ; SSE3-NEXT: psrlw $2, %xmm0
20044 ; SSE3-NEXT: pand %xmm1, %xmm0
20045 ; SSE3-NEXT: paddb %xmm2, %xmm0
20046 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20047 ; SSE3-NEXT: psrlw $4, %xmm1
20048 ; SSE3-NEXT: paddb %xmm0, %xmm1
20049 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20050 ; SSE3-NEXT: pxor %xmm0, %xmm0
20051 ; SSE3-NEXT: psadbw %xmm0, %xmm1
20052 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20053 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20054 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
20055 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20056 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
20057 ; SSE3-NEXT: pand %xmm2, %xmm3
20058 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
20059 ; SSE3-NEXT: por %xmm3, %xmm0
20062 ; SSSE3-LABEL: ugt_10_v2i64:
20064 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20065 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20066 ; SSSE3-NEXT: pand %xmm1, %xmm2
20067 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20068 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20069 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20070 ; SSSE3-NEXT: psrlw $4, %xmm0
20071 ; SSSE3-NEXT: pand %xmm1, %xmm0
20072 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20073 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20074 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20075 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
20076 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20077 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20078 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
20079 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20080 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
20081 ; SSSE3-NEXT: pand %xmm1, %xmm2
20082 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
20083 ; SSSE3-NEXT: por %xmm2, %xmm0
20086 ; SSE41-LABEL: ugt_10_v2i64:
20088 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20089 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20090 ; SSE41-NEXT: pand %xmm1, %xmm2
20091 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20092 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20093 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20094 ; SSE41-NEXT: psrlw $4, %xmm0
20095 ; SSE41-NEXT: pand %xmm1, %xmm0
20096 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20097 ; SSE41-NEXT: paddb %xmm4, %xmm3
20098 ; SSE41-NEXT: pxor %xmm0, %xmm0
20099 ; SSE41-NEXT: psadbw %xmm0, %xmm3
20100 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20101 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20102 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
20103 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20104 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
20105 ; SSE41-NEXT: pand %xmm1, %xmm2
20106 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
20107 ; SSE41-NEXT: por %xmm2, %xmm0
20110 ; AVX1-LABEL: ugt_10_v2i64:
20112 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20113 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20114 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20115 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20116 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20117 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20118 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20119 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20120 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20121 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20122 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20125 ; AVX2-LABEL: ugt_10_v2i64:
20127 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20128 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20129 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20130 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20131 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20132 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20133 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20134 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20135 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20136 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20137 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20140 ; AVX512VPOPCNTDQ-LABEL: ugt_10_v2i64:
20141 ; AVX512VPOPCNTDQ: # %bb.0:
20142 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20143 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20144 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20145 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20146 ; AVX512VPOPCNTDQ-NEXT: retq
20148 ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v2i64:
20149 ; AVX512VPOPCNTDQVL: # %bb.0:
20150 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20151 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
20152 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
20153 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20154 ; AVX512VPOPCNTDQVL-NEXT: retq
20156 ; BITALG_NOVLX-LABEL: ugt_10_v2i64:
20157 ; BITALG_NOVLX: # %bb.0:
20158 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20159 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20160 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20161 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20162 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20163 ; BITALG_NOVLX-NEXT: vzeroupper
20164 ; BITALG_NOVLX-NEXT: retq
20166 ; BITALG-LABEL: ugt_10_v2i64:
20168 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20169 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20170 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20171 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
20172 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
20173 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20174 ; BITALG-NEXT: retq
20175 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20176 %3 = icmp ugt <2 x i64> %2, <i64 10, i64 10>
20177 %4 = sext <2 x i1> %3 to <2 x i64>
20181 define <2 x i64> @ult_11_v2i64(<2 x i64> %0) {
20182 ; SSE2-LABEL: ult_11_v2i64:
20184 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20185 ; SSE2-NEXT: psrlw $1, %xmm1
20186 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20187 ; SSE2-NEXT: psubb %xmm1, %xmm0
20188 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20189 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20190 ; SSE2-NEXT: pand %xmm1, %xmm2
20191 ; SSE2-NEXT: psrlw $2, %xmm0
20192 ; SSE2-NEXT: pand %xmm1, %xmm0
20193 ; SSE2-NEXT: paddb %xmm2, %xmm0
20194 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20195 ; SSE2-NEXT: psrlw $4, %xmm1
20196 ; SSE2-NEXT: paddb %xmm0, %xmm1
20197 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20198 ; SSE2-NEXT: pxor %xmm0, %xmm0
20199 ; SSE2-NEXT: psadbw %xmm0, %xmm1
20200 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20201 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20202 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
20203 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483659,2147483659]
20204 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
20205 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20206 ; SSE2-NEXT: pand %xmm2, %xmm1
20207 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20208 ; SSE2-NEXT: por %xmm1, %xmm0
20211 ; SSE3-LABEL: ult_11_v2i64:
20213 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20214 ; SSE3-NEXT: psrlw $1, %xmm1
20215 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20216 ; SSE3-NEXT: psubb %xmm1, %xmm0
20217 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20218 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20219 ; SSE3-NEXT: pand %xmm1, %xmm2
20220 ; SSE3-NEXT: psrlw $2, %xmm0
20221 ; SSE3-NEXT: pand %xmm1, %xmm0
20222 ; SSE3-NEXT: paddb %xmm2, %xmm0
20223 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20224 ; SSE3-NEXT: psrlw $4, %xmm1
20225 ; SSE3-NEXT: paddb %xmm0, %xmm1
20226 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20227 ; SSE3-NEXT: pxor %xmm0, %xmm0
20228 ; SSE3-NEXT: psadbw %xmm0, %xmm1
20229 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20230 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20231 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
20232 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483659,2147483659]
20233 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
20234 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20235 ; SSE3-NEXT: pand %xmm2, %xmm1
20236 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20237 ; SSE3-NEXT: por %xmm1, %xmm0
20240 ; SSSE3-LABEL: ult_11_v2i64:
20242 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20243 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20244 ; SSSE3-NEXT: pand %xmm1, %xmm2
20245 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20246 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20247 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20248 ; SSSE3-NEXT: psrlw $4, %xmm0
20249 ; SSSE3-NEXT: pand %xmm1, %xmm0
20250 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20251 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20252 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20253 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
20254 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20255 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20256 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
20257 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483659,2147483659]
20258 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
20259 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
20260 ; SSSE3-NEXT: pand %xmm1, %xmm2
20261 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20262 ; SSSE3-NEXT: por %xmm2, %xmm0
20265 ; SSE41-LABEL: ult_11_v2i64:
20267 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20268 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20269 ; SSE41-NEXT: pand %xmm1, %xmm2
20270 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20271 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20272 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20273 ; SSE41-NEXT: psrlw $4, %xmm0
20274 ; SSE41-NEXT: pand %xmm1, %xmm0
20275 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20276 ; SSE41-NEXT: paddb %xmm4, %xmm3
20277 ; SSE41-NEXT: pxor %xmm0, %xmm0
20278 ; SSE41-NEXT: psadbw %xmm0, %xmm3
20279 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20280 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20281 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
20282 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483659,2147483659]
20283 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
20284 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
20285 ; SSE41-NEXT: pand %xmm1, %xmm2
20286 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20287 ; SSE41-NEXT: por %xmm2, %xmm0
20290 ; AVX1-LABEL: ult_11_v2i64:
20292 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20293 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20294 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20295 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20296 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20297 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20298 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20299 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20300 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20301 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20302 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11]
20303 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20306 ; AVX2-LABEL: ult_11_v2i64:
20308 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20309 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20310 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20311 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20312 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20313 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20314 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20315 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20316 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20317 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20318 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11]
20319 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20322 ; AVX512VPOPCNTDQ-LABEL: ult_11_v2i64:
20323 ; AVX512VPOPCNTDQ: # %bb.0:
20324 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20325 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20326 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11]
20327 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20328 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20329 ; AVX512VPOPCNTDQ-NEXT: retq
20331 ; AVX512VPOPCNTDQVL-LABEL: ult_11_v2i64:
20332 ; AVX512VPOPCNTDQVL: # %bb.0:
20333 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20334 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
20335 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
20336 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20337 ; AVX512VPOPCNTDQVL-NEXT: retq
20339 ; BITALG_NOVLX-LABEL: ult_11_v2i64:
20340 ; BITALG_NOVLX: # %bb.0:
20341 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20342 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20343 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20344 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20345 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11]
20346 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20347 ; BITALG_NOVLX-NEXT: vzeroupper
20348 ; BITALG_NOVLX-NEXT: retq
20350 ; BITALG-LABEL: ult_11_v2i64:
20352 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20353 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20354 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20355 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
20356 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
20357 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20358 ; BITALG-NEXT: retq
20359 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20360 %3 = icmp ult <2 x i64> %2, <i64 11, i64 11>
20361 %4 = sext <2 x i1> %3 to <2 x i64>
20365 define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) {
20366 ; SSE2-LABEL: ugt_11_v2i64:
20368 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20369 ; SSE2-NEXT: psrlw $1, %xmm1
20370 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20371 ; SSE2-NEXT: psubb %xmm1, %xmm0
20372 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20373 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20374 ; SSE2-NEXT: pand %xmm1, %xmm2
20375 ; SSE2-NEXT: psrlw $2, %xmm0
20376 ; SSE2-NEXT: pand %xmm1, %xmm0
20377 ; SSE2-NEXT: paddb %xmm2, %xmm0
20378 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20379 ; SSE2-NEXT: psrlw $4, %xmm1
20380 ; SSE2-NEXT: paddb %xmm0, %xmm1
20381 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20382 ; SSE2-NEXT: pxor %xmm0, %xmm0
20383 ; SSE2-NEXT: psadbw %xmm0, %xmm1
20384 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20385 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20386 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
20387 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20388 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
20389 ; SSE2-NEXT: pand %xmm2, %xmm3
20390 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
20391 ; SSE2-NEXT: por %xmm3, %xmm0
20394 ; SSE3-LABEL: ugt_11_v2i64:
20396 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20397 ; SSE3-NEXT: psrlw $1, %xmm1
20398 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20399 ; SSE3-NEXT: psubb %xmm1, %xmm0
20400 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20401 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20402 ; SSE3-NEXT: pand %xmm1, %xmm2
20403 ; SSE3-NEXT: psrlw $2, %xmm0
20404 ; SSE3-NEXT: pand %xmm1, %xmm0
20405 ; SSE3-NEXT: paddb %xmm2, %xmm0
20406 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20407 ; SSE3-NEXT: psrlw $4, %xmm1
20408 ; SSE3-NEXT: paddb %xmm0, %xmm1
20409 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20410 ; SSE3-NEXT: pxor %xmm0, %xmm0
20411 ; SSE3-NEXT: psadbw %xmm0, %xmm1
20412 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20413 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20414 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
20415 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20416 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
20417 ; SSE3-NEXT: pand %xmm2, %xmm3
20418 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
20419 ; SSE3-NEXT: por %xmm3, %xmm0
20422 ; SSSE3-LABEL: ugt_11_v2i64:
20424 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20425 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20426 ; SSSE3-NEXT: pand %xmm1, %xmm2
20427 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20428 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20429 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20430 ; SSSE3-NEXT: psrlw $4, %xmm0
20431 ; SSSE3-NEXT: pand %xmm1, %xmm0
20432 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20433 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20434 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20435 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
20436 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20437 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20438 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
20439 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20440 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
20441 ; SSSE3-NEXT: pand %xmm1, %xmm2
20442 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
20443 ; SSSE3-NEXT: por %xmm2, %xmm0
20446 ; SSE41-LABEL: ugt_11_v2i64:
20448 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20449 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20450 ; SSE41-NEXT: pand %xmm1, %xmm2
20451 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20452 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20453 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20454 ; SSE41-NEXT: psrlw $4, %xmm0
20455 ; SSE41-NEXT: pand %xmm1, %xmm0
20456 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20457 ; SSE41-NEXT: paddb %xmm4, %xmm3
20458 ; SSE41-NEXT: pxor %xmm0, %xmm0
20459 ; SSE41-NEXT: psadbw %xmm0, %xmm3
20460 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20461 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20462 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
20463 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20464 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
20465 ; SSE41-NEXT: pand %xmm1, %xmm2
20466 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
20467 ; SSE41-NEXT: por %xmm2, %xmm0
20470 ; AVX1-LABEL: ugt_11_v2i64:
20472 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20473 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20474 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20475 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20476 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20477 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20478 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20479 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20480 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20481 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20482 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20485 ; AVX2-LABEL: ugt_11_v2i64:
20487 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20488 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20489 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20490 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20491 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20492 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20493 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20494 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20495 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20496 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20497 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20500 ; AVX512VPOPCNTDQ-LABEL: ugt_11_v2i64:
20501 ; AVX512VPOPCNTDQ: # %bb.0:
20502 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20503 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20504 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20505 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20506 ; AVX512VPOPCNTDQ-NEXT: retq
20508 ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v2i64:
20509 ; AVX512VPOPCNTDQVL: # %bb.0:
20510 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20511 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
20512 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
20513 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20514 ; AVX512VPOPCNTDQVL-NEXT: retq
20516 ; BITALG_NOVLX-LABEL: ugt_11_v2i64:
20517 ; BITALG_NOVLX: # %bb.0:
20518 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20519 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20520 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20521 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20522 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20523 ; BITALG_NOVLX-NEXT: vzeroupper
20524 ; BITALG_NOVLX-NEXT: retq
20526 ; BITALG-LABEL: ugt_11_v2i64:
20528 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20529 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20530 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20531 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
20532 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
20533 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20534 ; BITALG-NEXT: retq
20535 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20536 %3 = icmp ugt <2 x i64> %2, <i64 11, i64 11>
20537 %4 = sext <2 x i1> %3 to <2 x i64>
20541 define <2 x i64> @ult_12_v2i64(<2 x i64> %0) {
20542 ; SSE2-LABEL: ult_12_v2i64:
20544 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20545 ; SSE2-NEXT: psrlw $1, %xmm1
20546 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20547 ; SSE2-NEXT: psubb %xmm1, %xmm0
20548 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20549 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20550 ; SSE2-NEXT: pand %xmm1, %xmm2
20551 ; SSE2-NEXT: psrlw $2, %xmm0
20552 ; SSE2-NEXT: pand %xmm1, %xmm0
20553 ; SSE2-NEXT: paddb %xmm2, %xmm0
20554 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20555 ; SSE2-NEXT: psrlw $4, %xmm1
20556 ; SSE2-NEXT: paddb %xmm0, %xmm1
20557 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20558 ; SSE2-NEXT: pxor %xmm0, %xmm0
20559 ; SSE2-NEXT: psadbw %xmm0, %xmm1
20560 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20561 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20562 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
20563 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483660,2147483660]
20564 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
20565 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20566 ; SSE2-NEXT: pand %xmm2, %xmm1
20567 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20568 ; SSE2-NEXT: por %xmm1, %xmm0
20571 ; SSE3-LABEL: ult_12_v2i64:
20573 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20574 ; SSE3-NEXT: psrlw $1, %xmm1
20575 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20576 ; SSE3-NEXT: psubb %xmm1, %xmm0
20577 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20578 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20579 ; SSE3-NEXT: pand %xmm1, %xmm2
20580 ; SSE3-NEXT: psrlw $2, %xmm0
20581 ; SSE3-NEXT: pand %xmm1, %xmm0
20582 ; SSE3-NEXT: paddb %xmm2, %xmm0
20583 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20584 ; SSE3-NEXT: psrlw $4, %xmm1
20585 ; SSE3-NEXT: paddb %xmm0, %xmm1
20586 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20587 ; SSE3-NEXT: pxor %xmm0, %xmm0
20588 ; SSE3-NEXT: psadbw %xmm0, %xmm1
20589 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20590 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20591 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
20592 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483660,2147483660]
20593 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
20594 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20595 ; SSE3-NEXT: pand %xmm2, %xmm1
20596 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20597 ; SSE3-NEXT: por %xmm1, %xmm0
20600 ; SSSE3-LABEL: ult_12_v2i64:
20602 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20603 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20604 ; SSSE3-NEXT: pand %xmm1, %xmm2
20605 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20606 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20607 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20608 ; SSSE3-NEXT: psrlw $4, %xmm0
20609 ; SSSE3-NEXT: pand %xmm1, %xmm0
20610 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20611 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20612 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20613 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
20614 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20615 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20616 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
20617 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483660,2147483660]
20618 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
20619 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
20620 ; SSSE3-NEXT: pand %xmm1, %xmm2
20621 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20622 ; SSSE3-NEXT: por %xmm2, %xmm0
20625 ; SSE41-LABEL: ult_12_v2i64:
20627 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20628 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20629 ; SSE41-NEXT: pand %xmm1, %xmm2
20630 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20631 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20632 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20633 ; SSE41-NEXT: psrlw $4, %xmm0
20634 ; SSE41-NEXT: pand %xmm1, %xmm0
20635 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20636 ; SSE41-NEXT: paddb %xmm4, %xmm3
20637 ; SSE41-NEXT: pxor %xmm0, %xmm0
20638 ; SSE41-NEXT: psadbw %xmm0, %xmm3
20639 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20640 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20641 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
20642 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483660,2147483660]
20643 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
20644 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
20645 ; SSE41-NEXT: pand %xmm1, %xmm2
20646 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20647 ; SSE41-NEXT: por %xmm2, %xmm0
20650 ; AVX1-LABEL: ult_12_v2i64:
20652 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20653 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20654 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20655 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20656 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20657 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20658 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20659 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20660 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20661 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20662 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12]
20663 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20666 ; AVX2-LABEL: ult_12_v2i64:
20668 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20669 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20670 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20671 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20672 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20673 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20674 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20675 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20676 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20677 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20678 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12]
20679 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20682 ; AVX512VPOPCNTDQ-LABEL: ult_12_v2i64:
20683 ; AVX512VPOPCNTDQ: # %bb.0:
20684 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20685 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20686 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12]
20687 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20688 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20689 ; AVX512VPOPCNTDQ-NEXT: retq
20691 ; AVX512VPOPCNTDQVL-LABEL: ult_12_v2i64:
20692 ; AVX512VPOPCNTDQVL: # %bb.0:
20693 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20694 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
20695 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
20696 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20697 ; AVX512VPOPCNTDQVL-NEXT: retq
20699 ; BITALG_NOVLX-LABEL: ult_12_v2i64:
20700 ; BITALG_NOVLX: # %bb.0:
20701 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20702 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20703 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20704 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20705 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12]
20706 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20707 ; BITALG_NOVLX-NEXT: vzeroupper
20708 ; BITALG_NOVLX-NEXT: retq
20710 ; BITALG-LABEL: ult_12_v2i64:
20712 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20713 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20714 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20715 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
20716 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
20717 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20718 ; BITALG-NEXT: retq
20719 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20720 %3 = icmp ult <2 x i64> %2, <i64 12, i64 12>
20721 %4 = sext <2 x i1> %3 to <2 x i64>
20725 define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) {
20726 ; SSE2-LABEL: ugt_12_v2i64:
20728 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20729 ; SSE2-NEXT: psrlw $1, %xmm1
20730 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20731 ; SSE2-NEXT: psubb %xmm1, %xmm0
20732 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20733 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20734 ; SSE2-NEXT: pand %xmm1, %xmm2
20735 ; SSE2-NEXT: psrlw $2, %xmm0
20736 ; SSE2-NEXT: pand %xmm1, %xmm0
20737 ; SSE2-NEXT: paddb %xmm2, %xmm0
20738 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20739 ; SSE2-NEXT: psrlw $4, %xmm1
20740 ; SSE2-NEXT: paddb %xmm0, %xmm1
20741 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20742 ; SSE2-NEXT: pxor %xmm0, %xmm0
20743 ; SSE2-NEXT: psadbw %xmm0, %xmm1
20744 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20745 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20746 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
20747 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20748 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
20749 ; SSE2-NEXT: pand %xmm2, %xmm3
20750 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
20751 ; SSE2-NEXT: por %xmm3, %xmm0
20754 ; SSE3-LABEL: ugt_12_v2i64:
20756 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20757 ; SSE3-NEXT: psrlw $1, %xmm1
20758 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20759 ; SSE3-NEXT: psubb %xmm1, %xmm0
20760 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20761 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20762 ; SSE3-NEXT: pand %xmm1, %xmm2
20763 ; SSE3-NEXT: psrlw $2, %xmm0
20764 ; SSE3-NEXT: pand %xmm1, %xmm0
20765 ; SSE3-NEXT: paddb %xmm2, %xmm0
20766 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20767 ; SSE3-NEXT: psrlw $4, %xmm1
20768 ; SSE3-NEXT: paddb %xmm0, %xmm1
20769 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20770 ; SSE3-NEXT: pxor %xmm0, %xmm0
20771 ; SSE3-NEXT: psadbw %xmm0, %xmm1
20772 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20773 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20774 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
20775 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20776 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
20777 ; SSE3-NEXT: pand %xmm2, %xmm3
20778 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
20779 ; SSE3-NEXT: por %xmm3, %xmm0
20782 ; SSSE3-LABEL: ugt_12_v2i64:
20784 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20785 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20786 ; SSSE3-NEXT: pand %xmm1, %xmm2
20787 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20788 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20789 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20790 ; SSSE3-NEXT: psrlw $4, %xmm0
20791 ; SSSE3-NEXT: pand %xmm1, %xmm0
20792 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20793 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20794 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20795 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
20796 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20797 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20798 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
20799 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20800 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
20801 ; SSSE3-NEXT: pand %xmm1, %xmm2
20802 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
20803 ; SSSE3-NEXT: por %xmm2, %xmm0
20806 ; SSE41-LABEL: ugt_12_v2i64:
20808 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20809 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20810 ; SSE41-NEXT: pand %xmm1, %xmm2
20811 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20812 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20813 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20814 ; SSE41-NEXT: psrlw $4, %xmm0
20815 ; SSE41-NEXT: pand %xmm1, %xmm0
20816 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20817 ; SSE41-NEXT: paddb %xmm4, %xmm3
20818 ; SSE41-NEXT: pxor %xmm0, %xmm0
20819 ; SSE41-NEXT: psadbw %xmm0, %xmm3
20820 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20821 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20822 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
20823 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20824 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
20825 ; SSE41-NEXT: pand %xmm1, %xmm2
20826 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
20827 ; SSE41-NEXT: por %xmm2, %xmm0
20830 ; AVX1-LABEL: ugt_12_v2i64:
20832 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20833 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20834 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20835 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20836 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20837 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20838 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20839 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20840 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20841 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20842 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20845 ; AVX2-LABEL: ugt_12_v2i64:
20847 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20848 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20849 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20850 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20851 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20852 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20853 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20854 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20855 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20856 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20857 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20860 ; AVX512VPOPCNTDQ-LABEL: ugt_12_v2i64:
20861 ; AVX512VPOPCNTDQ: # %bb.0:
20862 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20863 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20864 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20865 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20866 ; AVX512VPOPCNTDQ-NEXT: retq
20868 ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v2i64:
20869 ; AVX512VPOPCNTDQVL: # %bb.0:
20870 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20871 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
20872 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
20873 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20874 ; AVX512VPOPCNTDQVL-NEXT: retq
20876 ; BITALG_NOVLX-LABEL: ugt_12_v2i64:
20877 ; BITALG_NOVLX: # %bb.0:
20878 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20879 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20880 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20881 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20882 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20883 ; BITALG_NOVLX-NEXT: vzeroupper
20884 ; BITALG_NOVLX-NEXT: retq
20886 ; BITALG-LABEL: ugt_12_v2i64:
20888 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20889 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20890 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20891 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
20892 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
20893 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
20894 ; BITALG-NEXT: retq
20895 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20896 %3 = icmp ugt <2 x i64> %2, <i64 12, i64 12>
20897 %4 = sext <2 x i1> %3 to <2 x i64>
20901 define <2 x i64> @ult_13_v2i64(<2 x i64> %0) {
20902 ; SSE2-LABEL: ult_13_v2i64:
20904 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20905 ; SSE2-NEXT: psrlw $1, %xmm1
20906 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20907 ; SSE2-NEXT: psubb %xmm1, %xmm0
20908 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20909 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20910 ; SSE2-NEXT: pand %xmm1, %xmm2
20911 ; SSE2-NEXT: psrlw $2, %xmm0
20912 ; SSE2-NEXT: pand %xmm1, %xmm0
20913 ; SSE2-NEXT: paddb %xmm2, %xmm0
20914 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20915 ; SSE2-NEXT: psrlw $4, %xmm1
20916 ; SSE2-NEXT: paddb %xmm0, %xmm1
20917 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20918 ; SSE2-NEXT: pxor %xmm0, %xmm0
20919 ; SSE2-NEXT: psadbw %xmm0, %xmm1
20920 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20921 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20922 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
20923 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483661,2147483661]
20924 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
20925 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20926 ; SSE2-NEXT: pand %xmm2, %xmm1
20927 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20928 ; SSE2-NEXT: por %xmm1, %xmm0
20931 ; SSE3-LABEL: ult_13_v2i64:
20933 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20934 ; SSE3-NEXT: psrlw $1, %xmm1
20935 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20936 ; SSE3-NEXT: psubb %xmm1, %xmm0
20937 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20938 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20939 ; SSE3-NEXT: pand %xmm1, %xmm2
20940 ; SSE3-NEXT: psrlw $2, %xmm0
20941 ; SSE3-NEXT: pand %xmm1, %xmm0
20942 ; SSE3-NEXT: paddb %xmm2, %xmm0
20943 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20944 ; SSE3-NEXT: psrlw $4, %xmm1
20945 ; SSE3-NEXT: paddb %xmm0, %xmm1
20946 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20947 ; SSE3-NEXT: pxor %xmm0, %xmm0
20948 ; SSE3-NEXT: psadbw %xmm0, %xmm1
20949 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20950 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
20951 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
20952 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483661,2147483661]
20953 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
20954 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20955 ; SSE3-NEXT: pand %xmm2, %xmm1
20956 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20957 ; SSE3-NEXT: por %xmm1, %xmm0
20960 ; SSSE3-LABEL: ult_13_v2i64:
20962 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20963 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20964 ; SSSE3-NEXT: pand %xmm1, %xmm2
20965 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20966 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20967 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20968 ; SSSE3-NEXT: psrlw $4, %xmm0
20969 ; SSSE3-NEXT: pand %xmm1, %xmm0
20970 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20971 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20972 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20973 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
20974 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
20975 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
20976 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
20977 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483661,2147483661]
20978 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
20979 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
20980 ; SSSE3-NEXT: pand %xmm1, %xmm2
20981 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
20982 ; SSSE3-NEXT: por %xmm2, %xmm0
20985 ; SSE41-LABEL: ult_13_v2i64:
20987 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20988 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20989 ; SSE41-NEXT: pand %xmm1, %xmm2
20990 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20991 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20992 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20993 ; SSE41-NEXT: psrlw $4, %xmm0
20994 ; SSE41-NEXT: pand %xmm1, %xmm0
20995 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20996 ; SSE41-NEXT: paddb %xmm4, %xmm3
20997 ; SSE41-NEXT: pxor %xmm0, %xmm0
20998 ; SSE41-NEXT: psadbw %xmm0, %xmm3
20999 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21000 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21001 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
21002 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483661,2147483661]
21003 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
21004 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
21005 ; SSE41-NEXT: pand %xmm1, %xmm2
21006 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
21007 ; SSE41-NEXT: por %xmm2, %xmm0
21010 ; AVX1-LABEL: ult_13_v2i64:
21012 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21013 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21014 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21015 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21016 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21017 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21018 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21019 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21020 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21021 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21022 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13]
21023 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21026 ; AVX2-LABEL: ult_13_v2i64:
21028 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21029 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21030 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21031 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21032 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21033 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21034 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21035 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21036 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21037 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21038 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13]
21039 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21042 ; AVX512VPOPCNTDQ-LABEL: ult_13_v2i64:
21043 ; AVX512VPOPCNTDQ: # %bb.0:
21044 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21045 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21046 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13]
21047 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21048 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21049 ; AVX512VPOPCNTDQ-NEXT: retq
21051 ; AVX512VPOPCNTDQVL-LABEL: ult_13_v2i64:
21052 ; AVX512VPOPCNTDQVL: # %bb.0:
21053 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21054 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21055 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21056 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21057 ; AVX512VPOPCNTDQVL-NEXT: retq
21059 ; BITALG_NOVLX-LABEL: ult_13_v2i64:
21060 ; BITALG_NOVLX: # %bb.0:
21061 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21062 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21063 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21064 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21065 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13]
21066 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21067 ; BITALG_NOVLX-NEXT: vzeroupper
21068 ; BITALG_NOVLX-NEXT: retq
21070 ; BITALG-LABEL: ult_13_v2i64:
21072 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21073 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21074 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21075 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21076 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21077 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21078 ; BITALG-NEXT: retq
21079 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21080 %3 = icmp ult <2 x i64> %2, <i64 13, i64 13>
21081 %4 = sext <2 x i1> %3 to <2 x i64>
21085 define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) {
21086 ; SSE2-LABEL: ugt_13_v2i64:
21088 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21089 ; SSE2-NEXT: psrlw $1, %xmm1
21090 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21091 ; SSE2-NEXT: psubb %xmm1, %xmm0
21092 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21093 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21094 ; SSE2-NEXT: pand %xmm1, %xmm2
21095 ; SSE2-NEXT: psrlw $2, %xmm0
21096 ; SSE2-NEXT: pand %xmm1, %xmm0
21097 ; SSE2-NEXT: paddb %xmm2, %xmm0
21098 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21099 ; SSE2-NEXT: psrlw $4, %xmm1
21100 ; SSE2-NEXT: paddb %xmm0, %xmm1
21101 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21102 ; SSE2-NEXT: pxor %xmm0, %xmm0
21103 ; SSE2-NEXT: psadbw %xmm0, %xmm1
21104 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21105 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
21106 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
21107 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21108 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
21109 ; SSE2-NEXT: pand %xmm2, %xmm3
21110 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
21111 ; SSE2-NEXT: por %xmm3, %xmm0
21114 ; SSE3-LABEL: ugt_13_v2i64:
21116 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21117 ; SSE3-NEXT: psrlw $1, %xmm1
21118 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21119 ; SSE3-NEXT: psubb %xmm1, %xmm0
21120 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21121 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21122 ; SSE3-NEXT: pand %xmm1, %xmm2
21123 ; SSE3-NEXT: psrlw $2, %xmm0
21124 ; SSE3-NEXT: pand %xmm1, %xmm0
21125 ; SSE3-NEXT: paddb %xmm2, %xmm0
21126 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21127 ; SSE3-NEXT: psrlw $4, %xmm1
21128 ; SSE3-NEXT: paddb %xmm0, %xmm1
21129 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21130 ; SSE3-NEXT: pxor %xmm0, %xmm0
21131 ; SSE3-NEXT: psadbw %xmm0, %xmm1
21132 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21133 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
21134 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
21135 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21136 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
21137 ; SSE3-NEXT: pand %xmm2, %xmm3
21138 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
21139 ; SSE3-NEXT: por %xmm3, %xmm0
21142 ; SSSE3-LABEL: ugt_13_v2i64:
21144 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21145 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21146 ; SSSE3-NEXT: pand %xmm1, %xmm2
21147 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21148 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21149 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21150 ; SSSE3-NEXT: psrlw $4, %xmm0
21151 ; SSSE3-NEXT: pand %xmm1, %xmm0
21152 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21153 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21154 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21155 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
21156 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21157 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21158 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
21159 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21160 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
21161 ; SSSE3-NEXT: pand %xmm1, %xmm2
21162 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
21163 ; SSSE3-NEXT: por %xmm2, %xmm0
21166 ; SSE41-LABEL: ugt_13_v2i64:
21168 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21169 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21170 ; SSE41-NEXT: pand %xmm1, %xmm2
21171 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21172 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21173 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21174 ; SSE41-NEXT: psrlw $4, %xmm0
21175 ; SSE41-NEXT: pand %xmm1, %xmm0
21176 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21177 ; SSE41-NEXT: paddb %xmm4, %xmm3
21178 ; SSE41-NEXT: pxor %xmm0, %xmm0
21179 ; SSE41-NEXT: psadbw %xmm0, %xmm3
21180 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21181 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21182 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
21183 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21184 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
21185 ; SSE41-NEXT: pand %xmm1, %xmm2
21186 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
21187 ; SSE41-NEXT: por %xmm2, %xmm0
21190 ; AVX1-LABEL: ugt_13_v2i64:
21192 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21193 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21194 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21195 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21196 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21197 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21198 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21199 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21200 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21201 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21202 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21205 ; AVX2-LABEL: ugt_13_v2i64:
21207 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21208 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21209 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21210 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21211 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21212 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21213 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21214 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21215 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21216 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21217 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21220 ; AVX512VPOPCNTDQ-LABEL: ugt_13_v2i64:
21221 ; AVX512VPOPCNTDQ: # %bb.0:
21222 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21223 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21224 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21225 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21226 ; AVX512VPOPCNTDQ-NEXT: retq
21228 ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v2i64:
21229 ; AVX512VPOPCNTDQVL: # %bb.0:
21230 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21231 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21232 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21233 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21234 ; AVX512VPOPCNTDQVL-NEXT: retq
21236 ; BITALG_NOVLX-LABEL: ugt_13_v2i64:
21237 ; BITALG_NOVLX: # %bb.0:
21238 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21239 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21240 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21241 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21242 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21243 ; BITALG_NOVLX-NEXT: vzeroupper
21244 ; BITALG_NOVLX-NEXT: retq
21246 ; BITALG-LABEL: ugt_13_v2i64:
21248 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21249 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21250 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21251 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21252 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21253 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21254 ; BITALG-NEXT: retq
21255 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21256 %3 = icmp ugt <2 x i64> %2, <i64 13, i64 13>
21257 %4 = sext <2 x i1> %3 to <2 x i64>
21261 define <2 x i64> @ult_14_v2i64(<2 x i64> %0) {
21262 ; SSE2-LABEL: ult_14_v2i64:
21264 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21265 ; SSE2-NEXT: psrlw $1, %xmm1
21266 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21267 ; SSE2-NEXT: psubb %xmm1, %xmm0
21268 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21269 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21270 ; SSE2-NEXT: pand %xmm1, %xmm2
21271 ; SSE2-NEXT: psrlw $2, %xmm0
21272 ; SSE2-NEXT: pand %xmm1, %xmm0
21273 ; SSE2-NEXT: paddb %xmm2, %xmm0
21274 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21275 ; SSE2-NEXT: psrlw $4, %xmm1
21276 ; SSE2-NEXT: paddb %xmm0, %xmm1
21277 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21278 ; SSE2-NEXT: pxor %xmm0, %xmm0
21279 ; SSE2-NEXT: psadbw %xmm0, %xmm1
21280 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21281 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
21282 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
21283 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483662,2147483662]
21284 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
21285 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21286 ; SSE2-NEXT: pand %xmm2, %xmm1
21287 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
21288 ; SSE2-NEXT: por %xmm1, %xmm0
21291 ; SSE3-LABEL: ult_14_v2i64:
21293 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21294 ; SSE3-NEXT: psrlw $1, %xmm1
21295 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21296 ; SSE3-NEXT: psubb %xmm1, %xmm0
21297 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21298 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21299 ; SSE3-NEXT: pand %xmm1, %xmm2
21300 ; SSE3-NEXT: psrlw $2, %xmm0
21301 ; SSE3-NEXT: pand %xmm1, %xmm0
21302 ; SSE3-NEXT: paddb %xmm2, %xmm0
21303 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21304 ; SSE3-NEXT: psrlw $4, %xmm1
21305 ; SSE3-NEXT: paddb %xmm0, %xmm1
21306 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21307 ; SSE3-NEXT: pxor %xmm0, %xmm0
21308 ; SSE3-NEXT: psadbw %xmm0, %xmm1
21309 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21310 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
21311 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
21312 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483662,2147483662]
21313 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
21314 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21315 ; SSE3-NEXT: pand %xmm2, %xmm1
21316 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
21317 ; SSE3-NEXT: por %xmm1, %xmm0
21320 ; SSSE3-LABEL: ult_14_v2i64:
21322 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21323 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21324 ; SSSE3-NEXT: pand %xmm1, %xmm2
21325 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21326 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21327 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21328 ; SSSE3-NEXT: psrlw $4, %xmm0
21329 ; SSSE3-NEXT: pand %xmm1, %xmm0
21330 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21331 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21332 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21333 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
21334 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21335 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21336 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
21337 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483662,2147483662]
21338 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
21339 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
21340 ; SSSE3-NEXT: pand %xmm1, %xmm2
21341 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
21342 ; SSSE3-NEXT: por %xmm2, %xmm0
21345 ; SSE41-LABEL: ult_14_v2i64:
21347 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21348 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21349 ; SSE41-NEXT: pand %xmm1, %xmm2
21350 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21351 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21352 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21353 ; SSE41-NEXT: psrlw $4, %xmm0
21354 ; SSE41-NEXT: pand %xmm1, %xmm0
21355 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21356 ; SSE41-NEXT: paddb %xmm4, %xmm3
21357 ; SSE41-NEXT: pxor %xmm0, %xmm0
21358 ; SSE41-NEXT: psadbw %xmm0, %xmm3
21359 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21360 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21361 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
21362 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483662,2147483662]
21363 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
21364 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
21365 ; SSE41-NEXT: pand %xmm1, %xmm2
21366 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
21367 ; SSE41-NEXT: por %xmm2, %xmm0
21370 ; AVX1-LABEL: ult_14_v2i64:
21372 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21373 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21374 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21375 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21376 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21377 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21378 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21379 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21380 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21381 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21382 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14]
21383 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21386 ; AVX2-LABEL: ult_14_v2i64:
21388 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21389 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21390 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21391 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21392 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21393 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21394 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21395 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21396 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21397 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21398 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14]
21399 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21402 ; AVX512VPOPCNTDQ-LABEL: ult_14_v2i64:
21403 ; AVX512VPOPCNTDQ: # %bb.0:
21404 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21405 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21406 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14]
21407 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21408 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21409 ; AVX512VPOPCNTDQ-NEXT: retq
21411 ; AVX512VPOPCNTDQVL-LABEL: ult_14_v2i64:
21412 ; AVX512VPOPCNTDQVL: # %bb.0:
21413 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21414 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21415 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21416 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21417 ; AVX512VPOPCNTDQVL-NEXT: retq
21419 ; BITALG_NOVLX-LABEL: ult_14_v2i64:
21420 ; BITALG_NOVLX: # %bb.0:
21421 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21422 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21423 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21424 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21425 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14]
21426 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21427 ; BITALG_NOVLX-NEXT: vzeroupper
21428 ; BITALG_NOVLX-NEXT: retq
21430 ; BITALG-LABEL: ult_14_v2i64:
21432 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21433 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21434 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21435 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21436 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21437 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21438 ; BITALG-NEXT: retq
21439 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21440 %3 = icmp ult <2 x i64> %2, <i64 14, i64 14>
21441 %4 = sext <2 x i1> %3 to <2 x i64>
21445 define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) {
21446 ; SSE2-LABEL: ugt_14_v2i64:
21448 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21449 ; SSE2-NEXT: psrlw $1, %xmm1
21450 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21451 ; SSE2-NEXT: psubb %xmm1, %xmm0
21452 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21453 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21454 ; SSE2-NEXT: pand %xmm1, %xmm2
21455 ; SSE2-NEXT: psrlw $2, %xmm0
21456 ; SSE2-NEXT: pand %xmm1, %xmm0
21457 ; SSE2-NEXT: paddb %xmm2, %xmm0
21458 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21459 ; SSE2-NEXT: psrlw $4, %xmm1
21460 ; SSE2-NEXT: paddb %xmm0, %xmm1
21461 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21462 ; SSE2-NEXT: pxor %xmm0, %xmm0
21463 ; SSE2-NEXT: psadbw %xmm0, %xmm1
21464 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21465 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
21466 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
21467 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21468 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
21469 ; SSE2-NEXT: pand %xmm2, %xmm3
21470 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
21471 ; SSE2-NEXT: por %xmm3, %xmm0
21474 ; SSE3-LABEL: ugt_14_v2i64:
21476 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21477 ; SSE3-NEXT: psrlw $1, %xmm1
21478 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21479 ; SSE3-NEXT: psubb %xmm1, %xmm0
21480 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21481 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21482 ; SSE3-NEXT: pand %xmm1, %xmm2
21483 ; SSE3-NEXT: psrlw $2, %xmm0
21484 ; SSE3-NEXT: pand %xmm1, %xmm0
21485 ; SSE3-NEXT: paddb %xmm2, %xmm0
21486 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21487 ; SSE3-NEXT: psrlw $4, %xmm1
21488 ; SSE3-NEXT: paddb %xmm0, %xmm1
21489 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21490 ; SSE3-NEXT: pxor %xmm0, %xmm0
21491 ; SSE3-NEXT: psadbw %xmm0, %xmm1
21492 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21493 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
21494 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
21495 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21496 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
21497 ; SSE3-NEXT: pand %xmm2, %xmm3
21498 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
21499 ; SSE3-NEXT: por %xmm3, %xmm0
21502 ; SSSE3-LABEL: ugt_14_v2i64:
21504 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21505 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21506 ; SSSE3-NEXT: pand %xmm1, %xmm2
21507 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21508 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21509 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21510 ; SSSE3-NEXT: psrlw $4, %xmm0
21511 ; SSSE3-NEXT: pand %xmm1, %xmm0
21512 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21513 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21514 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21515 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
21516 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21517 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21518 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
21519 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21520 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
21521 ; SSSE3-NEXT: pand %xmm1, %xmm2
21522 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
21523 ; SSSE3-NEXT: por %xmm2, %xmm0
21526 ; SSE41-LABEL: ugt_14_v2i64:
21528 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21529 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21530 ; SSE41-NEXT: pand %xmm1, %xmm2
21531 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21532 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21533 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21534 ; SSE41-NEXT: psrlw $4, %xmm0
21535 ; SSE41-NEXT: pand %xmm1, %xmm0
21536 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21537 ; SSE41-NEXT: paddb %xmm4, %xmm3
21538 ; SSE41-NEXT: pxor %xmm0, %xmm0
21539 ; SSE41-NEXT: psadbw %xmm0, %xmm3
21540 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21541 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21542 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
21543 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21544 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
21545 ; SSE41-NEXT: pand %xmm1, %xmm2
21546 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
21547 ; SSE41-NEXT: por %xmm2, %xmm0
21550 ; AVX1-LABEL: ugt_14_v2i64:
21552 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21553 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21554 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21555 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21556 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21557 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21558 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21559 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21560 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21561 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21562 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21565 ; AVX2-LABEL: ugt_14_v2i64:
21567 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21568 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21569 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21570 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21571 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21572 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21573 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21574 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21575 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21576 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21577 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21580 ; AVX512VPOPCNTDQ-LABEL: ugt_14_v2i64:
21581 ; AVX512VPOPCNTDQ: # %bb.0:
21582 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21583 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21584 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21585 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21586 ; AVX512VPOPCNTDQ-NEXT: retq
21588 ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v2i64:
21589 ; AVX512VPOPCNTDQVL: # %bb.0:
21590 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21591 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21592 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21593 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21594 ; AVX512VPOPCNTDQVL-NEXT: retq
21596 ; BITALG_NOVLX-LABEL: ugt_14_v2i64:
21597 ; BITALG_NOVLX: # %bb.0:
21598 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21599 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21600 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21601 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21602 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21603 ; BITALG_NOVLX-NEXT: vzeroupper
21604 ; BITALG_NOVLX-NEXT: retq
21606 ; BITALG-LABEL: ugt_14_v2i64:
21608 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21609 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21610 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21611 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21612 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21613 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21614 ; BITALG-NEXT: retq
21615 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21616 %3 = icmp ugt <2 x i64> %2, <i64 14, i64 14>
21617 %4 = sext <2 x i1> %3 to <2 x i64>
21621 define <2 x i64> @ult_15_v2i64(<2 x i64> %0) {
21622 ; SSE2-LABEL: ult_15_v2i64:
21624 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21625 ; SSE2-NEXT: psrlw $1, %xmm1
21626 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21627 ; SSE2-NEXT: psubb %xmm1, %xmm0
21628 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21629 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21630 ; SSE2-NEXT: pand %xmm1, %xmm2
21631 ; SSE2-NEXT: psrlw $2, %xmm0
21632 ; SSE2-NEXT: pand %xmm1, %xmm0
21633 ; SSE2-NEXT: paddb %xmm2, %xmm0
21634 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21635 ; SSE2-NEXT: psrlw $4, %xmm1
21636 ; SSE2-NEXT: paddb %xmm0, %xmm1
21637 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21638 ; SSE2-NEXT: pxor %xmm0, %xmm0
21639 ; SSE2-NEXT: psadbw %xmm0, %xmm1
21640 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21641 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
21642 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
21643 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483663,2147483663]
21644 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
21645 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21646 ; SSE2-NEXT: pand %xmm2, %xmm1
21647 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
21648 ; SSE2-NEXT: por %xmm1, %xmm0
21651 ; SSE3-LABEL: ult_15_v2i64:
21653 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21654 ; SSE3-NEXT: psrlw $1, %xmm1
21655 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21656 ; SSE3-NEXT: psubb %xmm1, %xmm0
21657 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21658 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21659 ; SSE3-NEXT: pand %xmm1, %xmm2
21660 ; SSE3-NEXT: psrlw $2, %xmm0
21661 ; SSE3-NEXT: pand %xmm1, %xmm0
21662 ; SSE3-NEXT: paddb %xmm2, %xmm0
21663 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21664 ; SSE3-NEXT: psrlw $4, %xmm1
21665 ; SSE3-NEXT: paddb %xmm0, %xmm1
21666 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21667 ; SSE3-NEXT: pxor %xmm0, %xmm0
21668 ; SSE3-NEXT: psadbw %xmm0, %xmm1
21669 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21670 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
21671 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
21672 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483663,2147483663]
21673 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
21674 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21675 ; SSE3-NEXT: pand %xmm2, %xmm1
21676 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
21677 ; SSE3-NEXT: por %xmm1, %xmm0
21680 ; SSSE3-LABEL: ult_15_v2i64:
21682 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21683 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21684 ; SSSE3-NEXT: pand %xmm1, %xmm2
21685 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21686 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21687 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21688 ; SSSE3-NEXT: psrlw $4, %xmm0
21689 ; SSSE3-NEXT: pand %xmm1, %xmm0
21690 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21691 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21692 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21693 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
21694 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21695 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21696 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
21697 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483663,2147483663]
21698 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
21699 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
21700 ; SSSE3-NEXT: pand %xmm1, %xmm2
21701 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
21702 ; SSSE3-NEXT: por %xmm2, %xmm0
21705 ; SSE41-LABEL: ult_15_v2i64:
21707 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21708 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21709 ; SSE41-NEXT: pand %xmm1, %xmm2
21710 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21711 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21712 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21713 ; SSE41-NEXT: psrlw $4, %xmm0
21714 ; SSE41-NEXT: pand %xmm1, %xmm0
21715 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21716 ; SSE41-NEXT: paddb %xmm4, %xmm3
21717 ; SSE41-NEXT: pxor %xmm0, %xmm0
21718 ; SSE41-NEXT: psadbw %xmm0, %xmm3
21719 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21720 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21721 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
21722 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483663,2147483663]
21723 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
21724 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
21725 ; SSE41-NEXT: pand %xmm1, %xmm2
21726 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
21727 ; SSE41-NEXT: por %xmm2, %xmm0
21730 ; AVX1-LABEL: ult_15_v2i64:
21732 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21733 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21734 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21735 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21736 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21737 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21738 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21739 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21740 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21741 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21742 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15]
21743 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21746 ; AVX2-LABEL: ult_15_v2i64:
21748 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21749 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21750 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21751 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21752 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21753 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21754 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21755 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21756 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21757 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21758 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15]
21759 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21762 ; AVX512VPOPCNTDQ-LABEL: ult_15_v2i64:
21763 ; AVX512VPOPCNTDQ: # %bb.0:
21764 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21765 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21766 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15]
21767 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21768 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21769 ; AVX512VPOPCNTDQ-NEXT: retq
21771 ; AVX512VPOPCNTDQVL-LABEL: ult_15_v2i64:
21772 ; AVX512VPOPCNTDQVL: # %bb.0:
21773 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21774 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21775 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21776 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21777 ; AVX512VPOPCNTDQVL-NEXT: retq
21779 ; BITALG_NOVLX-LABEL: ult_15_v2i64:
21780 ; BITALG_NOVLX: # %bb.0:
21781 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21782 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21783 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21784 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21785 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15]
21786 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21787 ; BITALG_NOVLX-NEXT: vzeroupper
21788 ; BITALG_NOVLX-NEXT: retq
21790 ; BITALG-LABEL: ult_15_v2i64:
21792 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21793 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21794 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21795 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21796 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21797 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21798 ; BITALG-NEXT: retq
21799 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21800 %3 = icmp ult <2 x i64> %2, <i64 15, i64 15>
21801 %4 = sext <2 x i1> %3 to <2 x i64>
21805 define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) {
21806 ; SSE2-LABEL: ugt_15_v2i64:
21808 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21809 ; SSE2-NEXT: psrlw $1, %xmm1
21810 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21811 ; SSE2-NEXT: psubb %xmm1, %xmm0
21812 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21813 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21814 ; SSE2-NEXT: pand %xmm1, %xmm2
21815 ; SSE2-NEXT: psrlw $2, %xmm0
21816 ; SSE2-NEXT: pand %xmm1, %xmm0
21817 ; SSE2-NEXT: paddb %xmm2, %xmm0
21818 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21819 ; SSE2-NEXT: psrlw $4, %xmm1
21820 ; SSE2-NEXT: paddb %xmm0, %xmm1
21821 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21822 ; SSE2-NEXT: pxor %xmm0, %xmm0
21823 ; SSE2-NEXT: psadbw %xmm0, %xmm1
21824 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21825 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
21826 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
21827 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21828 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
21829 ; SSE2-NEXT: pand %xmm2, %xmm3
21830 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
21831 ; SSE2-NEXT: por %xmm3, %xmm0
21834 ; SSE3-LABEL: ugt_15_v2i64:
21836 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21837 ; SSE3-NEXT: psrlw $1, %xmm1
21838 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21839 ; SSE3-NEXT: psubb %xmm1, %xmm0
21840 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21841 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21842 ; SSE3-NEXT: pand %xmm1, %xmm2
21843 ; SSE3-NEXT: psrlw $2, %xmm0
21844 ; SSE3-NEXT: pand %xmm1, %xmm0
21845 ; SSE3-NEXT: paddb %xmm2, %xmm0
21846 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21847 ; SSE3-NEXT: psrlw $4, %xmm1
21848 ; SSE3-NEXT: paddb %xmm0, %xmm1
21849 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21850 ; SSE3-NEXT: pxor %xmm0, %xmm0
21851 ; SSE3-NEXT: psadbw %xmm0, %xmm1
21852 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21853 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
21854 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
21855 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21856 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
21857 ; SSE3-NEXT: pand %xmm2, %xmm3
21858 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
21859 ; SSE3-NEXT: por %xmm3, %xmm0
21862 ; SSSE3-LABEL: ugt_15_v2i64:
21864 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21865 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21866 ; SSSE3-NEXT: pand %xmm1, %xmm2
21867 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21868 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21869 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21870 ; SSSE3-NEXT: psrlw $4, %xmm0
21871 ; SSSE3-NEXT: pand %xmm1, %xmm0
21872 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21873 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21874 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21875 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
21876 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21877 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21878 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
21879 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21880 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
21881 ; SSSE3-NEXT: pand %xmm1, %xmm2
21882 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
21883 ; SSSE3-NEXT: por %xmm2, %xmm0
21886 ; SSE41-LABEL: ugt_15_v2i64:
21888 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21889 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21890 ; SSE41-NEXT: pand %xmm1, %xmm2
21891 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21892 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21893 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21894 ; SSE41-NEXT: psrlw $4, %xmm0
21895 ; SSE41-NEXT: pand %xmm1, %xmm0
21896 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21897 ; SSE41-NEXT: paddb %xmm4, %xmm3
21898 ; SSE41-NEXT: pxor %xmm0, %xmm0
21899 ; SSE41-NEXT: psadbw %xmm0, %xmm3
21900 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21901 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
21902 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
21903 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
21904 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
21905 ; SSE41-NEXT: pand %xmm1, %xmm2
21906 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
21907 ; SSE41-NEXT: por %xmm2, %xmm0
21910 ; AVX1-LABEL: ugt_15_v2i64:
21912 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21913 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21914 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21915 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21916 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21917 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21918 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21919 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21920 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21921 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21922 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21925 ; AVX2-LABEL: ugt_15_v2i64:
21927 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21928 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21929 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21930 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21931 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21932 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21933 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21934 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21935 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21936 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21937 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21940 ; AVX512VPOPCNTDQ-LABEL: ugt_15_v2i64:
21941 ; AVX512VPOPCNTDQ: # %bb.0:
21942 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21943 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21944 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21945 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21946 ; AVX512VPOPCNTDQ-NEXT: retq
21948 ; AVX512VPOPCNTDQVL-LABEL: ugt_15_v2i64:
21949 ; AVX512VPOPCNTDQVL: # %bb.0:
21950 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21951 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21952 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21953 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21954 ; AVX512VPOPCNTDQVL-NEXT: retq
21956 ; BITALG_NOVLX-LABEL: ugt_15_v2i64:
21957 ; BITALG_NOVLX: # %bb.0:
21958 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21959 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21960 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21961 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21962 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21963 ; BITALG_NOVLX-NEXT: vzeroupper
21964 ; BITALG_NOVLX-NEXT: retq
21966 ; BITALG-LABEL: ugt_15_v2i64:
21968 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21969 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21970 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21971 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
21972 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
21973 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
21974 ; BITALG-NEXT: retq
21975 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21976 %3 = icmp ugt <2 x i64> %2, <i64 15, i64 15>
21977 %4 = sext <2 x i1> %3 to <2 x i64>
21981 define <2 x i64> @ult_16_v2i64(<2 x i64> %0) {
21982 ; SSE2-LABEL: ult_16_v2i64:
21984 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21985 ; SSE2-NEXT: psrlw $1, %xmm1
21986 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21987 ; SSE2-NEXT: psubb %xmm1, %xmm0
21988 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21989 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21990 ; SSE2-NEXT: pand %xmm1, %xmm2
21991 ; SSE2-NEXT: psrlw $2, %xmm0
21992 ; SSE2-NEXT: pand %xmm1, %xmm0
21993 ; SSE2-NEXT: paddb %xmm2, %xmm0
21994 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21995 ; SSE2-NEXT: psrlw $4, %xmm1
21996 ; SSE2-NEXT: paddb %xmm0, %xmm1
21997 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21998 ; SSE2-NEXT: pxor %xmm0, %xmm0
21999 ; SSE2-NEXT: psadbw %xmm0, %xmm1
22000 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22001 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22002 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
22003 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483664,2147483664]
22004 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
22005 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22006 ; SSE2-NEXT: pand %xmm2, %xmm1
22007 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22008 ; SSE2-NEXT: por %xmm1, %xmm0
22011 ; SSE3-LABEL: ult_16_v2i64:
22013 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22014 ; SSE3-NEXT: psrlw $1, %xmm1
22015 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22016 ; SSE3-NEXT: psubb %xmm1, %xmm0
22017 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22018 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22019 ; SSE3-NEXT: pand %xmm1, %xmm2
22020 ; SSE3-NEXT: psrlw $2, %xmm0
22021 ; SSE3-NEXT: pand %xmm1, %xmm0
22022 ; SSE3-NEXT: paddb %xmm2, %xmm0
22023 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22024 ; SSE3-NEXT: psrlw $4, %xmm1
22025 ; SSE3-NEXT: paddb %xmm0, %xmm1
22026 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22027 ; SSE3-NEXT: pxor %xmm0, %xmm0
22028 ; SSE3-NEXT: psadbw %xmm0, %xmm1
22029 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22030 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22031 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
22032 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483664,2147483664]
22033 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
22034 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22035 ; SSE3-NEXT: pand %xmm2, %xmm1
22036 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22037 ; SSE3-NEXT: por %xmm1, %xmm0
22040 ; SSSE3-LABEL: ult_16_v2i64:
22042 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22043 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22044 ; SSSE3-NEXT: pand %xmm1, %xmm2
22045 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22046 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22047 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22048 ; SSSE3-NEXT: psrlw $4, %xmm0
22049 ; SSSE3-NEXT: pand %xmm1, %xmm0
22050 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22051 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22052 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22053 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
22054 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22055 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22056 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22057 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483664,2147483664]
22058 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
22059 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
22060 ; SSSE3-NEXT: pand %xmm1, %xmm2
22061 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22062 ; SSSE3-NEXT: por %xmm2, %xmm0
22065 ; SSE41-LABEL: ult_16_v2i64:
22067 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22068 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22069 ; SSE41-NEXT: pand %xmm1, %xmm2
22070 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22071 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22072 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22073 ; SSE41-NEXT: psrlw $4, %xmm0
22074 ; SSE41-NEXT: pand %xmm1, %xmm0
22075 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22076 ; SSE41-NEXT: paddb %xmm4, %xmm3
22077 ; SSE41-NEXT: pxor %xmm0, %xmm0
22078 ; SSE41-NEXT: psadbw %xmm0, %xmm3
22079 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22080 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22081 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
22082 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483664,2147483664]
22083 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
22084 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
22085 ; SSE41-NEXT: pand %xmm1, %xmm2
22086 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22087 ; SSE41-NEXT: por %xmm2, %xmm0
22090 ; AVX1-LABEL: ult_16_v2i64:
22092 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22093 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22094 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22095 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22096 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22097 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22098 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22099 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22100 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22101 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22102 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16]
22103 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22106 ; AVX2-LABEL: ult_16_v2i64:
22108 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22109 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22110 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22111 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22112 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22113 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22114 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22115 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22116 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22117 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22118 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16]
22119 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22122 ; AVX512VPOPCNTDQ-LABEL: ult_16_v2i64:
22123 ; AVX512VPOPCNTDQ: # %bb.0:
22124 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22125 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22126 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16]
22127 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22128 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22129 ; AVX512VPOPCNTDQ-NEXT: retq
22131 ; AVX512VPOPCNTDQVL-LABEL: ult_16_v2i64:
22132 ; AVX512VPOPCNTDQVL: # %bb.0:
22133 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22134 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
22135 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
22136 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22137 ; AVX512VPOPCNTDQVL-NEXT: retq
22139 ; BITALG_NOVLX-LABEL: ult_16_v2i64:
22140 ; BITALG_NOVLX: # %bb.0:
22141 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22142 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22143 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22144 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22145 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16]
22146 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22147 ; BITALG_NOVLX-NEXT: vzeroupper
22148 ; BITALG_NOVLX-NEXT: retq
22150 ; BITALG-LABEL: ult_16_v2i64:
22152 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22153 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22154 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22155 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
22156 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
22157 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22158 ; BITALG-NEXT: retq
22159 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22160 %3 = icmp ult <2 x i64> %2, <i64 16, i64 16>
22161 %4 = sext <2 x i1> %3 to <2 x i64>
22165 define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) {
22166 ; SSE2-LABEL: ugt_16_v2i64:
22168 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22169 ; SSE2-NEXT: psrlw $1, %xmm1
22170 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22171 ; SSE2-NEXT: psubb %xmm1, %xmm0
22172 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22173 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22174 ; SSE2-NEXT: pand %xmm1, %xmm2
22175 ; SSE2-NEXT: psrlw $2, %xmm0
22176 ; SSE2-NEXT: pand %xmm1, %xmm0
22177 ; SSE2-NEXT: paddb %xmm2, %xmm0
22178 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22179 ; SSE2-NEXT: psrlw $4, %xmm1
22180 ; SSE2-NEXT: paddb %xmm0, %xmm1
22181 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22182 ; SSE2-NEXT: pxor %xmm0, %xmm0
22183 ; SSE2-NEXT: psadbw %xmm0, %xmm1
22184 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22185 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22186 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
22187 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22188 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
22189 ; SSE2-NEXT: pand %xmm2, %xmm3
22190 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
22191 ; SSE2-NEXT: por %xmm3, %xmm0
22194 ; SSE3-LABEL: ugt_16_v2i64:
22196 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22197 ; SSE3-NEXT: psrlw $1, %xmm1
22198 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22199 ; SSE3-NEXT: psubb %xmm1, %xmm0
22200 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22201 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22202 ; SSE3-NEXT: pand %xmm1, %xmm2
22203 ; SSE3-NEXT: psrlw $2, %xmm0
22204 ; SSE3-NEXT: pand %xmm1, %xmm0
22205 ; SSE3-NEXT: paddb %xmm2, %xmm0
22206 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22207 ; SSE3-NEXT: psrlw $4, %xmm1
22208 ; SSE3-NEXT: paddb %xmm0, %xmm1
22209 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22210 ; SSE3-NEXT: pxor %xmm0, %xmm0
22211 ; SSE3-NEXT: psadbw %xmm0, %xmm1
22212 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22213 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22214 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
22215 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22216 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
22217 ; SSE3-NEXT: pand %xmm2, %xmm3
22218 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
22219 ; SSE3-NEXT: por %xmm3, %xmm0
22222 ; SSSE3-LABEL: ugt_16_v2i64:
22224 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22225 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22226 ; SSSE3-NEXT: pand %xmm1, %xmm2
22227 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22228 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22229 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22230 ; SSSE3-NEXT: psrlw $4, %xmm0
22231 ; SSSE3-NEXT: pand %xmm1, %xmm0
22232 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22233 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22234 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22235 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
22236 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22237 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22238 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22239 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22240 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
22241 ; SSSE3-NEXT: pand %xmm1, %xmm2
22242 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
22243 ; SSSE3-NEXT: por %xmm2, %xmm0
22246 ; SSE41-LABEL: ugt_16_v2i64:
22248 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22249 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22250 ; SSE41-NEXT: pand %xmm1, %xmm2
22251 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22252 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22253 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22254 ; SSE41-NEXT: psrlw $4, %xmm0
22255 ; SSE41-NEXT: pand %xmm1, %xmm0
22256 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22257 ; SSE41-NEXT: paddb %xmm4, %xmm3
22258 ; SSE41-NEXT: pxor %xmm0, %xmm0
22259 ; SSE41-NEXT: psadbw %xmm0, %xmm3
22260 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22261 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22262 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
22263 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22264 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
22265 ; SSE41-NEXT: pand %xmm1, %xmm2
22266 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
22267 ; SSE41-NEXT: por %xmm2, %xmm0
22270 ; AVX1-LABEL: ugt_16_v2i64:
22272 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22273 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22274 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22275 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22276 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22277 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22278 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22279 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22280 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22281 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22282 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22285 ; AVX2-LABEL: ugt_16_v2i64:
22287 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22288 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22289 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22290 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22291 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22292 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22293 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22294 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22295 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22296 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22297 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22300 ; AVX512VPOPCNTDQ-LABEL: ugt_16_v2i64:
22301 ; AVX512VPOPCNTDQ: # %bb.0:
22302 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22303 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22304 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22305 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22306 ; AVX512VPOPCNTDQ-NEXT: retq
22308 ; AVX512VPOPCNTDQVL-LABEL: ugt_16_v2i64:
22309 ; AVX512VPOPCNTDQVL: # %bb.0:
22310 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22311 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
22312 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
22313 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22314 ; AVX512VPOPCNTDQVL-NEXT: retq
22316 ; BITALG_NOVLX-LABEL: ugt_16_v2i64:
22317 ; BITALG_NOVLX: # %bb.0:
22318 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22319 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22320 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22321 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22322 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22323 ; BITALG_NOVLX-NEXT: vzeroupper
22324 ; BITALG_NOVLX-NEXT: retq
22326 ; BITALG-LABEL: ugt_16_v2i64:
22328 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22329 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22330 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22331 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
22332 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
22333 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22334 ; BITALG-NEXT: retq
22335 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22336 %3 = icmp ugt <2 x i64> %2, <i64 16, i64 16>
22337 %4 = sext <2 x i1> %3 to <2 x i64>
22341 define <2 x i64> @ult_17_v2i64(<2 x i64> %0) {
22342 ; SSE2-LABEL: ult_17_v2i64:
22344 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22345 ; SSE2-NEXT: psrlw $1, %xmm1
22346 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22347 ; SSE2-NEXT: psubb %xmm1, %xmm0
22348 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22349 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22350 ; SSE2-NEXT: pand %xmm1, %xmm2
22351 ; SSE2-NEXT: psrlw $2, %xmm0
22352 ; SSE2-NEXT: pand %xmm1, %xmm0
22353 ; SSE2-NEXT: paddb %xmm2, %xmm0
22354 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22355 ; SSE2-NEXT: psrlw $4, %xmm1
22356 ; SSE2-NEXT: paddb %xmm0, %xmm1
22357 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22358 ; SSE2-NEXT: pxor %xmm0, %xmm0
22359 ; SSE2-NEXT: psadbw %xmm0, %xmm1
22360 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22361 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22362 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
22363 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483665,2147483665]
22364 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
22365 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22366 ; SSE2-NEXT: pand %xmm2, %xmm1
22367 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22368 ; SSE2-NEXT: por %xmm1, %xmm0
22371 ; SSE3-LABEL: ult_17_v2i64:
22373 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22374 ; SSE3-NEXT: psrlw $1, %xmm1
22375 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22376 ; SSE3-NEXT: psubb %xmm1, %xmm0
22377 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22378 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22379 ; SSE3-NEXT: pand %xmm1, %xmm2
22380 ; SSE3-NEXT: psrlw $2, %xmm0
22381 ; SSE3-NEXT: pand %xmm1, %xmm0
22382 ; SSE3-NEXT: paddb %xmm2, %xmm0
22383 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22384 ; SSE3-NEXT: psrlw $4, %xmm1
22385 ; SSE3-NEXT: paddb %xmm0, %xmm1
22386 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22387 ; SSE3-NEXT: pxor %xmm0, %xmm0
22388 ; SSE3-NEXT: psadbw %xmm0, %xmm1
22389 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22390 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22391 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
22392 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483665,2147483665]
22393 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
22394 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22395 ; SSE3-NEXT: pand %xmm2, %xmm1
22396 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22397 ; SSE3-NEXT: por %xmm1, %xmm0
22400 ; SSSE3-LABEL: ult_17_v2i64:
22402 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22403 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22404 ; SSSE3-NEXT: pand %xmm1, %xmm2
22405 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22406 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22407 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22408 ; SSSE3-NEXT: psrlw $4, %xmm0
22409 ; SSSE3-NEXT: pand %xmm1, %xmm0
22410 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22411 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22412 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22413 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
22414 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22415 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22416 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22417 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483665,2147483665]
22418 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
22419 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
22420 ; SSSE3-NEXT: pand %xmm1, %xmm2
22421 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22422 ; SSSE3-NEXT: por %xmm2, %xmm0
22425 ; SSE41-LABEL: ult_17_v2i64:
22427 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22428 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22429 ; SSE41-NEXT: pand %xmm1, %xmm2
22430 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22431 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22432 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22433 ; SSE41-NEXT: psrlw $4, %xmm0
22434 ; SSE41-NEXT: pand %xmm1, %xmm0
22435 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22436 ; SSE41-NEXT: paddb %xmm4, %xmm3
22437 ; SSE41-NEXT: pxor %xmm0, %xmm0
22438 ; SSE41-NEXT: psadbw %xmm0, %xmm3
22439 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22440 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22441 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
22442 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483665,2147483665]
22443 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
22444 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
22445 ; SSE41-NEXT: pand %xmm1, %xmm2
22446 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22447 ; SSE41-NEXT: por %xmm2, %xmm0
22450 ; AVX1-LABEL: ult_17_v2i64:
22452 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22453 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22454 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22455 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22456 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22457 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22458 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22459 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22460 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22461 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22462 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17]
22463 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22466 ; AVX2-LABEL: ult_17_v2i64:
22468 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22469 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22470 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22471 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22472 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22473 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22474 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22475 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22476 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22477 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22478 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17]
22479 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22482 ; AVX512VPOPCNTDQ-LABEL: ult_17_v2i64:
22483 ; AVX512VPOPCNTDQ: # %bb.0:
22484 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22485 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22486 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17]
22487 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22488 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22489 ; AVX512VPOPCNTDQ-NEXT: retq
22491 ; AVX512VPOPCNTDQVL-LABEL: ult_17_v2i64:
22492 ; AVX512VPOPCNTDQVL: # %bb.0:
22493 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22494 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
22495 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
22496 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22497 ; AVX512VPOPCNTDQVL-NEXT: retq
22499 ; BITALG_NOVLX-LABEL: ult_17_v2i64:
22500 ; BITALG_NOVLX: # %bb.0:
22501 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22502 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22503 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22504 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22505 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17]
22506 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22507 ; BITALG_NOVLX-NEXT: vzeroupper
22508 ; BITALG_NOVLX-NEXT: retq
22510 ; BITALG-LABEL: ult_17_v2i64:
22512 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22513 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22514 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22515 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
22516 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
22517 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22518 ; BITALG-NEXT: retq
22519 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22520 %3 = icmp ult <2 x i64> %2, <i64 17, i64 17>
22521 %4 = sext <2 x i1> %3 to <2 x i64>
22525 define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) {
22526 ; SSE2-LABEL: ugt_17_v2i64:
22528 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22529 ; SSE2-NEXT: psrlw $1, %xmm1
22530 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22531 ; SSE2-NEXT: psubb %xmm1, %xmm0
22532 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22533 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22534 ; SSE2-NEXT: pand %xmm1, %xmm2
22535 ; SSE2-NEXT: psrlw $2, %xmm0
22536 ; SSE2-NEXT: pand %xmm1, %xmm0
22537 ; SSE2-NEXT: paddb %xmm2, %xmm0
22538 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22539 ; SSE2-NEXT: psrlw $4, %xmm1
22540 ; SSE2-NEXT: paddb %xmm0, %xmm1
22541 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22542 ; SSE2-NEXT: pxor %xmm0, %xmm0
22543 ; SSE2-NEXT: psadbw %xmm0, %xmm1
22544 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22545 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22546 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
22547 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22548 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
22549 ; SSE2-NEXT: pand %xmm2, %xmm3
22550 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
22551 ; SSE2-NEXT: por %xmm3, %xmm0
22554 ; SSE3-LABEL: ugt_17_v2i64:
22556 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22557 ; SSE3-NEXT: psrlw $1, %xmm1
22558 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22559 ; SSE3-NEXT: psubb %xmm1, %xmm0
22560 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22561 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22562 ; SSE3-NEXT: pand %xmm1, %xmm2
22563 ; SSE3-NEXT: psrlw $2, %xmm0
22564 ; SSE3-NEXT: pand %xmm1, %xmm0
22565 ; SSE3-NEXT: paddb %xmm2, %xmm0
22566 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22567 ; SSE3-NEXT: psrlw $4, %xmm1
22568 ; SSE3-NEXT: paddb %xmm0, %xmm1
22569 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22570 ; SSE3-NEXT: pxor %xmm0, %xmm0
22571 ; SSE3-NEXT: psadbw %xmm0, %xmm1
22572 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22573 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22574 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
22575 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22576 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
22577 ; SSE3-NEXT: pand %xmm2, %xmm3
22578 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
22579 ; SSE3-NEXT: por %xmm3, %xmm0
22582 ; SSSE3-LABEL: ugt_17_v2i64:
22584 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22585 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22586 ; SSSE3-NEXT: pand %xmm1, %xmm2
22587 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22588 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22589 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22590 ; SSSE3-NEXT: psrlw $4, %xmm0
22591 ; SSSE3-NEXT: pand %xmm1, %xmm0
22592 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22593 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22594 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22595 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
22596 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22597 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22598 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22599 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22600 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
22601 ; SSSE3-NEXT: pand %xmm1, %xmm2
22602 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
22603 ; SSSE3-NEXT: por %xmm2, %xmm0
22606 ; SSE41-LABEL: ugt_17_v2i64:
22608 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22609 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22610 ; SSE41-NEXT: pand %xmm1, %xmm2
22611 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22612 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22613 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22614 ; SSE41-NEXT: psrlw $4, %xmm0
22615 ; SSE41-NEXT: pand %xmm1, %xmm0
22616 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22617 ; SSE41-NEXT: paddb %xmm4, %xmm3
22618 ; SSE41-NEXT: pxor %xmm0, %xmm0
22619 ; SSE41-NEXT: psadbw %xmm0, %xmm3
22620 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22621 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22622 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
22623 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22624 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
22625 ; SSE41-NEXT: pand %xmm1, %xmm2
22626 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
22627 ; SSE41-NEXT: por %xmm2, %xmm0
22630 ; AVX1-LABEL: ugt_17_v2i64:
22632 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22633 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22634 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22635 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22636 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22637 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22638 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22639 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22640 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22641 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22642 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22645 ; AVX2-LABEL: ugt_17_v2i64:
22647 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22648 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22649 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22650 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22651 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22652 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22653 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22654 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22655 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22656 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22657 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22660 ; AVX512VPOPCNTDQ-LABEL: ugt_17_v2i64:
22661 ; AVX512VPOPCNTDQ: # %bb.0:
22662 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22663 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22664 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22665 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22666 ; AVX512VPOPCNTDQ-NEXT: retq
22668 ; AVX512VPOPCNTDQVL-LABEL: ugt_17_v2i64:
22669 ; AVX512VPOPCNTDQVL: # %bb.0:
22670 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22671 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
22672 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
22673 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22674 ; AVX512VPOPCNTDQVL-NEXT: retq
22676 ; BITALG_NOVLX-LABEL: ugt_17_v2i64:
22677 ; BITALG_NOVLX: # %bb.0:
22678 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22679 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22680 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22681 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22682 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22683 ; BITALG_NOVLX-NEXT: vzeroupper
22684 ; BITALG_NOVLX-NEXT: retq
22686 ; BITALG-LABEL: ugt_17_v2i64:
22688 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22689 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22690 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22691 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
22692 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
22693 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22694 ; BITALG-NEXT: retq
22695 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22696 %3 = icmp ugt <2 x i64> %2, <i64 17, i64 17>
22697 %4 = sext <2 x i1> %3 to <2 x i64>
22701 define <2 x i64> @ult_18_v2i64(<2 x i64> %0) {
22702 ; SSE2-LABEL: ult_18_v2i64:
22704 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22705 ; SSE2-NEXT: psrlw $1, %xmm1
22706 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22707 ; SSE2-NEXT: psubb %xmm1, %xmm0
22708 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22709 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22710 ; SSE2-NEXT: pand %xmm1, %xmm2
22711 ; SSE2-NEXT: psrlw $2, %xmm0
22712 ; SSE2-NEXT: pand %xmm1, %xmm0
22713 ; SSE2-NEXT: paddb %xmm2, %xmm0
22714 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22715 ; SSE2-NEXT: psrlw $4, %xmm1
22716 ; SSE2-NEXT: paddb %xmm0, %xmm1
22717 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22718 ; SSE2-NEXT: pxor %xmm0, %xmm0
22719 ; SSE2-NEXT: psadbw %xmm0, %xmm1
22720 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22721 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22722 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
22723 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483666,2147483666]
22724 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
22725 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22726 ; SSE2-NEXT: pand %xmm2, %xmm1
22727 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22728 ; SSE2-NEXT: por %xmm1, %xmm0
22731 ; SSE3-LABEL: ult_18_v2i64:
22733 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22734 ; SSE3-NEXT: psrlw $1, %xmm1
22735 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22736 ; SSE3-NEXT: psubb %xmm1, %xmm0
22737 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22738 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22739 ; SSE3-NEXT: pand %xmm1, %xmm2
22740 ; SSE3-NEXT: psrlw $2, %xmm0
22741 ; SSE3-NEXT: pand %xmm1, %xmm0
22742 ; SSE3-NEXT: paddb %xmm2, %xmm0
22743 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22744 ; SSE3-NEXT: psrlw $4, %xmm1
22745 ; SSE3-NEXT: paddb %xmm0, %xmm1
22746 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22747 ; SSE3-NEXT: pxor %xmm0, %xmm0
22748 ; SSE3-NEXT: psadbw %xmm0, %xmm1
22749 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22750 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22751 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
22752 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483666,2147483666]
22753 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
22754 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22755 ; SSE3-NEXT: pand %xmm2, %xmm1
22756 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22757 ; SSE3-NEXT: por %xmm1, %xmm0
22760 ; SSSE3-LABEL: ult_18_v2i64:
22762 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22763 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22764 ; SSSE3-NEXT: pand %xmm1, %xmm2
22765 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22766 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22767 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22768 ; SSSE3-NEXT: psrlw $4, %xmm0
22769 ; SSSE3-NEXT: pand %xmm1, %xmm0
22770 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22771 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22772 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22773 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
22774 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22775 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22776 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22777 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483666,2147483666]
22778 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
22779 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
22780 ; SSSE3-NEXT: pand %xmm1, %xmm2
22781 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22782 ; SSSE3-NEXT: por %xmm2, %xmm0
22785 ; SSE41-LABEL: ult_18_v2i64:
22787 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22788 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22789 ; SSE41-NEXT: pand %xmm1, %xmm2
22790 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22791 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22792 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22793 ; SSE41-NEXT: psrlw $4, %xmm0
22794 ; SSE41-NEXT: pand %xmm1, %xmm0
22795 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22796 ; SSE41-NEXT: paddb %xmm4, %xmm3
22797 ; SSE41-NEXT: pxor %xmm0, %xmm0
22798 ; SSE41-NEXT: psadbw %xmm0, %xmm3
22799 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22800 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22801 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
22802 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483666,2147483666]
22803 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
22804 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
22805 ; SSE41-NEXT: pand %xmm1, %xmm2
22806 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
22807 ; SSE41-NEXT: por %xmm2, %xmm0
22810 ; AVX1-LABEL: ult_18_v2i64:
22812 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22813 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22814 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22815 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22816 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22817 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22818 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22819 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22820 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22821 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22822 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18]
22823 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22826 ; AVX2-LABEL: ult_18_v2i64:
22828 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22829 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22830 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22831 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22832 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22833 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22834 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22835 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22836 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22837 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22838 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18]
22839 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22842 ; AVX512VPOPCNTDQ-LABEL: ult_18_v2i64:
22843 ; AVX512VPOPCNTDQ: # %bb.0:
22844 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22845 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22846 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18]
22847 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22848 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22849 ; AVX512VPOPCNTDQ-NEXT: retq
22851 ; AVX512VPOPCNTDQVL-LABEL: ult_18_v2i64:
22852 ; AVX512VPOPCNTDQVL: # %bb.0:
22853 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22854 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
22855 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
22856 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22857 ; AVX512VPOPCNTDQVL-NEXT: retq
22859 ; BITALG_NOVLX-LABEL: ult_18_v2i64:
22860 ; BITALG_NOVLX: # %bb.0:
22861 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22862 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22863 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22864 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22865 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18]
22866 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22867 ; BITALG_NOVLX-NEXT: vzeroupper
22868 ; BITALG_NOVLX-NEXT: retq
22870 ; BITALG-LABEL: ult_18_v2i64:
22872 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22873 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22874 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22875 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
22876 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
22877 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
22878 ; BITALG-NEXT: retq
22879 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22880 %3 = icmp ult <2 x i64> %2, <i64 18, i64 18>
22881 %4 = sext <2 x i1> %3 to <2 x i64>
22885 define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) {
22886 ; SSE2-LABEL: ugt_18_v2i64:
22888 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22889 ; SSE2-NEXT: psrlw $1, %xmm1
22890 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22891 ; SSE2-NEXT: psubb %xmm1, %xmm0
22892 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22893 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22894 ; SSE2-NEXT: pand %xmm1, %xmm2
22895 ; SSE2-NEXT: psrlw $2, %xmm0
22896 ; SSE2-NEXT: pand %xmm1, %xmm0
22897 ; SSE2-NEXT: paddb %xmm2, %xmm0
22898 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22899 ; SSE2-NEXT: psrlw $4, %xmm1
22900 ; SSE2-NEXT: paddb %xmm0, %xmm1
22901 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22902 ; SSE2-NEXT: pxor %xmm0, %xmm0
22903 ; SSE2-NEXT: psadbw %xmm0, %xmm1
22904 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22905 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22906 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
22907 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22908 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
22909 ; SSE2-NEXT: pand %xmm2, %xmm3
22910 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
22911 ; SSE2-NEXT: por %xmm3, %xmm0
22914 ; SSE3-LABEL: ugt_18_v2i64:
22916 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22917 ; SSE3-NEXT: psrlw $1, %xmm1
22918 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22919 ; SSE3-NEXT: psubb %xmm1, %xmm0
22920 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22921 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22922 ; SSE3-NEXT: pand %xmm1, %xmm2
22923 ; SSE3-NEXT: psrlw $2, %xmm0
22924 ; SSE3-NEXT: pand %xmm1, %xmm0
22925 ; SSE3-NEXT: paddb %xmm2, %xmm0
22926 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22927 ; SSE3-NEXT: psrlw $4, %xmm1
22928 ; SSE3-NEXT: paddb %xmm0, %xmm1
22929 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22930 ; SSE3-NEXT: pxor %xmm0, %xmm0
22931 ; SSE3-NEXT: psadbw %xmm0, %xmm1
22932 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22933 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
22934 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
22935 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22936 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
22937 ; SSE3-NEXT: pand %xmm2, %xmm3
22938 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
22939 ; SSE3-NEXT: por %xmm3, %xmm0
22942 ; SSSE3-LABEL: ugt_18_v2i64:
22944 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22945 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22946 ; SSSE3-NEXT: pand %xmm1, %xmm2
22947 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22948 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22949 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22950 ; SSSE3-NEXT: psrlw $4, %xmm0
22951 ; SSSE3-NEXT: pand %xmm1, %xmm0
22952 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22953 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22954 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22955 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
22956 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22957 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22958 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22959 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22960 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
22961 ; SSSE3-NEXT: pand %xmm1, %xmm2
22962 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
22963 ; SSSE3-NEXT: por %xmm2, %xmm0
22966 ; SSE41-LABEL: ugt_18_v2i64:
22968 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22969 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22970 ; SSE41-NEXT: pand %xmm1, %xmm2
22971 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22972 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22973 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22974 ; SSE41-NEXT: psrlw $4, %xmm0
22975 ; SSE41-NEXT: pand %xmm1, %xmm0
22976 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22977 ; SSE41-NEXT: paddb %xmm4, %xmm3
22978 ; SSE41-NEXT: pxor %xmm0, %xmm0
22979 ; SSE41-NEXT: psadbw %xmm0, %xmm3
22980 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22981 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
22982 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
22983 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
22984 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
22985 ; SSE41-NEXT: pand %xmm1, %xmm2
22986 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
22987 ; SSE41-NEXT: por %xmm2, %xmm0
22990 ; AVX1-LABEL: ugt_18_v2i64:
22992 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22993 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22994 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22995 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22996 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22997 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22998 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22999 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23000 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23001 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23002 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23005 ; AVX2-LABEL: ugt_18_v2i64:
23007 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23008 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23009 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23010 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23011 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23012 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23013 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23014 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23015 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23016 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23017 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23020 ; AVX512VPOPCNTDQ-LABEL: ugt_18_v2i64:
23021 ; AVX512VPOPCNTDQ: # %bb.0:
23022 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23023 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23024 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23025 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23026 ; AVX512VPOPCNTDQ-NEXT: retq
23028 ; AVX512VPOPCNTDQVL-LABEL: ugt_18_v2i64:
23029 ; AVX512VPOPCNTDQVL: # %bb.0:
23030 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23031 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23032 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23033 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23034 ; AVX512VPOPCNTDQVL-NEXT: retq
23036 ; BITALG_NOVLX-LABEL: ugt_18_v2i64:
23037 ; BITALG_NOVLX: # %bb.0:
23038 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23039 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23040 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23041 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23042 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23043 ; BITALG_NOVLX-NEXT: vzeroupper
23044 ; BITALG_NOVLX-NEXT: retq
23046 ; BITALG-LABEL: ugt_18_v2i64:
23048 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23049 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23050 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23051 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23052 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23053 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23054 ; BITALG-NEXT: retq
23055 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23056 %3 = icmp ugt <2 x i64> %2, <i64 18, i64 18>
23057 %4 = sext <2 x i1> %3 to <2 x i64>
23061 define <2 x i64> @ult_19_v2i64(<2 x i64> %0) {
23062 ; SSE2-LABEL: ult_19_v2i64:
23064 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23065 ; SSE2-NEXT: psrlw $1, %xmm1
23066 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23067 ; SSE2-NEXT: psubb %xmm1, %xmm0
23068 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23069 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23070 ; SSE2-NEXT: pand %xmm1, %xmm2
23071 ; SSE2-NEXT: psrlw $2, %xmm0
23072 ; SSE2-NEXT: pand %xmm1, %xmm0
23073 ; SSE2-NEXT: paddb %xmm2, %xmm0
23074 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23075 ; SSE2-NEXT: psrlw $4, %xmm1
23076 ; SSE2-NEXT: paddb %xmm0, %xmm1
23077 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23078 ; SSE2-NEXT: pxor %xmm0, %xmm0
23079 ; SSE2-NEXT: psadbw %xmm0, %xmm1
23080 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23081 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23082 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
23083 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483667,2147483667]
23084 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
23085 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23086 ; SSE2-NEXT: pand %xmm2, %xmm1
23087 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23088 ; SSE2-NEXT: por %xmm1, %xmm0
23091 ; SSE3-LABEL: ult_19_v2i64:
23093 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23094 ; SSE3-NEXT: psrlw $1, %xmm1
23095 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23096 ; SSE3-NEXT: psubb %xmm1, %xmm0
23097 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23098 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23099 ; SSE3-NEXT: pand %xmm1, %xmm2
23100 ; SSE3-NEXT: psrlw $2, %xmm0
23101 ; SSE3-NEXT: pand %xmm1, %xmm0
23102 ; SSE3-NEXT: paddb %xmm2, %xmm0
23103 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23104 ; SSE3-NEXT: psrlw $4, %xmm1
23105 ; SSE3-NEXT: paddb %xmm0, %xmm1
23106 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23107 ; SSE3-NEXT: pxor %xmm0, %xmm0
23108 ; SSE3-NEXT: psadbw %xmm0, %xmm1
23109 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23110 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23111 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
23112 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483667,2147483667]
23113 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
23114 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23115 ; SSE3-NEXT: pand %xmm2, %xmm1
23116 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23117 ; SSE3-NEXT: por %xmm1, %xmm0
23120 ; SSSE3-LABEL: ult_19_v2i64:
23122 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23123 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23124 ; SSSE3-NEXT: pand %xmm1, %xmm2
23125 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23126 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23127 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23128 ; SSSE3-NEXT: psrlw $4, %xmm0
23129 ; SSSE3-NEXT: pand %xmm1, %xmm0
23130 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23131 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23132 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23133 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
23134 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23135 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
23136 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23137 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483667,2147483667]
23138 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
23139 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
23140 ; SSSE3-NEXT: pand %xmm1, %xmm2
23141 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23142 ; SSSE3-NEXT: por %xmm2, %xmm0
23145 ; SSE41-LABEL: ult_19_v2i64:
23147 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23148 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23149 ; SSE41-NEXT: pand %xmm1, %xmm2
23150 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23151 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23152 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23153 ; SSE41-NEXT: psrlw $4, %xmm0
23154 ; SSE41-NEXT: pand %xmm1, %xmm0
23155 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23156 ; SSE41-NEXT: paddb %xmm4, %xmm3
23157 ; SSE41-NEXT: pxor %xmm0, %xmm0
23158 ; SSE41-NEXT: psadbw %xmm0, %xmm3
23159 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23160 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
23161 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
23162 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483667,2147483667]
23163 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
23164 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
23165 ; SSE41-NEXT: pand %xmm1, %xmm2
23166 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23167 ; SSE41-NEXT: por %xmm2, %xmm0
23170 ; AVX1-LABEL: ult_19_v2i64:
23172 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23173 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23174 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23175 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23176 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23177 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23178 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23179 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23180 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23181 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23182 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19]
23183 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23186 ; AVX2-LABEL: ult_19_v2i64:
23188 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23189 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23190 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23191 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23192 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23193 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23194 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23195 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23196 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23197 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23198 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19]
23199 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23202 ; AVX512VPOPCNTDQ-LABEL: ult_19_v2i64:
23203 ; AVX512VPOPCNTDQ: # %bb.0:
23204 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23205 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23206 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19]
23207 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23208 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23209 ; AVX512VPOPCNTDQ-NEXT: retq
23211 ; AVX512VPOPCNTDQVL-LABEL: ult_19_v2i64:
23212 ; AVX512VPOPCNTDQVL: # %bb.0:
23213 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23214 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23215 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23216 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23217 ; AVX512VPOPCNTDQVL-NEXT: retq
23219 ; BITALG_NOVLX-LABEL: ult_19_v2i64:
23220 ; BITALG_NOVLX: # %bb.0:
23221 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23222 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23223 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23224 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23225 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19]
23226 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23227 ; BITALG_NOVLX-NEXT: vzeroupper
23228 ; BITALG_NOVLX-NEXT: retq
23230 ; BITALG-LABEL: ult_19_v2i64:
23232 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23233 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23234 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23235 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23236 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23237 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23238 ; BITALG-NEXT: retq
23239 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23240 %3 = icmp ult <2 x i64> %2, <i64 19, i64 19>
23241 %4 = sext <2 x i1> %3 to <2 x i64>
23245 define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) {
23246 ; SSE2-LABEL: ugt_19_v2i64:
23248 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23249 ; SSE2-NEXT: psrlw $1, %xmm1
23250 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23251 ; SSE2-NEXT: psubb %xmm1, %xmm0
23252 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23253 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23254 ; SSE2-NEXT: pand %xmm1, %xmm2
23255 ; SSE2-NEXT: psrlw $2, %xmm0
23256 ; SSE2-NEXT: pand %xmm1, %xmm0
23257 ; SSE2-NEXT: paddb %xmm2, %xmm0
23258 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23259 ; SSE2-NEXT: psrlw $4, %xmm1
23260 ; SSE2-NEXT: paddb %xmm0, %xmm1
23261 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23262 ; SSE2-NEXT: pxor %xmm0, %xmm0
23263 ; SSE2-NEXT: psadbw %xmm0, %xmm1
23264 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23265 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23266 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
23267 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23268 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
23269 ; SSE2-NEXT: pand %xmm2, %xmm3
23270 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
23271 ; SSE2-NEXT: por %xmm3, %xmm0
23274 ; SSE3-LABEL: ugt_19_v2i64:
23276 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23277 ; SSE3-NEXT: psrlw $1, %xmm1
23278 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23279 ; SSE3-NEXT: psubb %xmm1, %xmm0
23280 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23281 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23282 ; SSE3-NEXT: pand %xmm1, %xmm2
23283 ; SSE3-NEXT: psrlw $2, %xmm0
23284 ; SSE3-NEXT: pand %xmm1, %xmm0
23285 ; SSE3-NEXT: paddb %xmm2, %xmm0
23286 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23287 ; SSE3-NEXT: psrlw $4, %xmm1
23288 ; SSE3-NEXT: paddb %xmm0, %xmm1
23289 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23290 ; SSE3-NEXT: pxor %xmm0, %xmm0
23291 ; SSE3-NEXT: psadbw %xmm0, %xmm1
23292 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23293 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23294 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
23295 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23296 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
23297 ; SSE3-NEXT: pand %xmm2, %xmm3
23298 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
23299 ; SSE3-NEXT: por %xmm3, %xmm0
23302 ; SSSE3-LABEL: ugt_19_v2i64:
23304 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23305 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23306 ; SSSE3-NEXT: pand %xmm1, %xmm2
23307 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23308 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23309 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23310 ; SSSE3-NEXT: psrlw $4, %xmm0
23311 ; SSSE3-NEXT: pand %xmm1, %xmm0
23312 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23313 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23314 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23315 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
23316 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23317 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
23318 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23319 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23320 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
23321 ; SSSE3-NEXT: pand %xmm1, %xmm2
23322 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
23323 ; SSSE3-NEXT: por %xmm2, %xmm0
23326 ; SSE41-LABEL: ugt_19_v2i64:
23328 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23329 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23330 ; SSE41-NEXT: pand %xmm1, %xmm2
23331 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23332 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23333 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23334 ; SSE41-NEXT: psrlw $4, %xmm0
23335 ; SSE41-NEXT: pand %xmm1, %xmm0
23336 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23337 ; SSE41-NEXT: paddb %xmm4, %xmm3
23338 ; SSE41-NEXT: pxor %xmm0, %xmm0
23339 ; SSE41-NEXT: psadbw %xmm0, %xmm3
23340 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23341 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
23342 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
23343 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23344 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
23345 ; SSE41-NEXT: pand %xmm1, %xmm2
23346 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
23347 ; SSE41-NEXT: por %xmm2, %xmm0
23350 ; AVX1-LABEL: ugt_19_v2i64:
23352 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23353 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23354 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23355 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23356 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23357 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23358 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23359 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23360 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23361 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23362 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23365 ; AVX2-LABEL: ugt_19_v2i64:
23367 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23368 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23369 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23370 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23371 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23372 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23373 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23374 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23375 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23376 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23377 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23380 ; AVX512VPOPCNTDQ-LABEL: ugt_19_v2i64:
23381 ; AVX512VPOPCNTDQ: # %bb.0:
23382 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23383 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23384 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23385 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23386 ; AVX512VPOPCNTDQ-NEXT: retq
23388 ; AVX512VPOPCNTDQVL-LABEL: ugt_19_v2i64:
23389 ; AVX512VPOPCNTDQVL: # %bb.0:
23390 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23391 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23392 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23393 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23394 ; AVX512VPOPCNTDQVL-NEXT: retq
23396 ; BITALG_NOVLX-LABEL: ugt_19_v2i64:
23397 ; BITALG_NOVLX: # %bb.0:
23398 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23399 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23400 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23401 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23402 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23403 ; BITALG_NOVLX-NEXT: vzeroupper
23404 ; BITALG_NOVLX-NEXT: retq
23406 ; BITALG-LABEL: ugt_19_v2i64:
23408 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23409 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23410 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23411 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23412 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23413 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23414 ; BITALG-NEXT: retq
23415 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23416 %3 = icmp ugt <2 x i64> %2, <i64 19, i64 19>
23417 %4 = sext <2 x i1> %3 to <2 x i64>
23421 define <2 x i64> @ult_20_v2i64(<2 x i64> %0) {
23422 ; SSE2-LABEL: ult_20_v2i64:
23424 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23425 ; SSE2-NEXT: psrlw $1, %xmm1
23426 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23427 ; SSE2-NEXT: psubb %xmm1, %xmm0
23428 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23429 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23430 ; SSE2-NEXT: pand %xmm1, %xmm2
23431 ; SSE2-NEXT: psrlw $2, %xmm0
23432 ; SSE2-NEXT: pand %xmm1, %xmm0
23433 ; SSE2-NEXT: paddb %xmm2, %xmm0
23434 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23435 ; SSE2-NEXT: psrlw $4, %xmm1
23436 ; SSE2-NEXT: paddb %xmm0, %xmm1
23437 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23438 ; SSE2-NEXT: pxor %xmm0, %xmm0
23439 ; SSE2-NEXT: psadbw %xmm0, %xmm1
23440 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23441 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23442 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
23443 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483668,2147483668]
23444 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
23445 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23446 ; SSE2-NEXT: pand %xmm2, %xmm1
23447 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23448 ; SSE2-NEXT: por %xmm1, %xmm0
23451 ; SSE3-LABEL: ult_20_v2i64:
23453 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23454 ; SSE3-NEXT: psrlw $1, %xmm1
23455 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23456 ; SSE3-NEXT: psubb %xmm1, %xmm0
23457 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23458 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23459 ; SSE3-NEXT: pand %xmm1, %xmm2
23460 ; SSE3-NEXT: psrlw $2, %xmm0
23461 ; SSE3-NEXT: pand %xmm1, %xmm0
23462 ; SSE3-NEXT: paddb %xmm2, %xmm0
23463 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23464 ; SSE3-NEXT: psrlw $4, %xmm1
23465 ; SSE3-NEXT: paddb %xmm0, %xmm1
23466 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23467 ; SSE3-NEXT: pxor %xmm0, %xmm0
23468 ; SSE3-NEXT: psadbw %xmm0, %xmm1
23469 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23470 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23471 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
23472 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483668,2147483668]
23473 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
23474 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23475 ; SSE3-NEXT: pand %xmm2, %xmm1
23476 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23477 ; SSE3-NEXT: por %xmm1, %xmm0
23480 ; SSSE3-LABEL: ult_20_v2i64:
23482 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23483 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23484 ; SSSE3-NEXT: pand %xmm1, %xmm2
23485 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23486 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23487 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23488 ; SSSE3-NEXT: psrlw $4, %xmm0
23489 ; SSSE3-NEXT: pand %xmm1, %xmm0
23490 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23491 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23492 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23493 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
23494 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23495 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
23496 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23497 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483668,2147483668]
23498 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
23499 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
23500 ; SSSE3-NEXT: pand %xmm1, %xmm2
23501 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23502 ; SSSE3-NEXT: por %xmm2, %xmm0
23505 ; SSE41-LABEL: ult_20_v2i64:
23507 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23508 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23509 ; SSE41-NEXT: pand %xmm1, %xmm2
23510 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23511 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23512 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23513 ; SSE41-NEXT: psrlw $4, %xmm0
23514 ; SSE41-NEXT: pand %xmm1, %xmm0
23515 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23516 ; SSE41-NEXT: paddb %xmm4, %xmm3
23517 ; SSE41-NEXT: pxor %xmm0, %xmm0
23518 ; SSE41-NEXT: psadbw %xmm0, %xmm3
23519 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23520 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
23521 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
23522 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483668,2147483668]
23523 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
23524 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
23525 ; SSE41-NEXT: pand %xmm1, %xmm2
23526 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23527 ; SSE41-NEXT: por %xmm2, %xmm0
23530 ; AVX1-LABEL: ult_20_v2i64:
23532 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23533 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23534 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23535 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23536 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23537 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23538 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23539 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23540 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23541 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23542 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20]
23543 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23546 ; AVX2-LABEL: ult_20_v2i64:
23548 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23549 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23550 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23551 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23552 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23553 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23554 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23555 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23556 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23557 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23558 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20]
23559 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23562 ; AVX512VPOPCNTDQ-LABEL: ult_20_v2i64:
23563 ; AVX512VPOPCNTDQ: # %bb.0:
23564 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23565 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23566 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20]
23567 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23568 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23569 ; AVX512VPOPCNTDQ-NEXT: retq
23571 ; AVX512VPOPCNTDQVL-LABEL: ult_20_v2i64:
23572 ; AVX512VPOPCNTDQVL: # %bb.0:
23573 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23574 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23575 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23576 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23577 ; AVX512VPOPCNTDQVL-NEXT: retq
23579 ; BITALG_NOVLX-LABEL: ult_20_v2i64:
23580 ; BITALG_NOVLX: # %bb.0:
23581 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23582 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23583 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23584 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23585 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20]
23586 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23587 ; BITALG_NOVLX-NEXT: vzeroupper
23588 ; BITALG_NOVLX-NEXT: retq
23590 ; BITALG-LABEL: ult_20_v2i64:
23592 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23593 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23594 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23595 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23596 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23597 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23598 ; BITALG-NEXT: retq
23599 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23600 %3 = icmp ult <2 x i64> %2, <i64 20, i64 20>
23601 %4 = sext <2 x i1> %3 to <2 x i64>
23605 define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) {
23606 ; SSE2-LABEL: ugt_20_v2i64:
23608 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23609 ; SSE2-NEXT: psrlw $1, %xmm1
23610 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23611 ; SSE2-NEXT: psubb %xmm1, %xmm0
23612 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23613 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23614 ; SSE2-NEXT: pand %xmm1, %xmm2
23615 ; SSE2-NEXT: psrlw $2, %xmm0
23616 ; SSE2-NEXT: pand %xmm1, %xmm0
23617 ; SSE2-NEXT: paddb %xmm2, %xmm0
23618 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23619 ; SSE2-NEXT: psrlw $4, %xmm1
23620 ; SSE2-NEXT: paddb %xmm0, %xmm1
23621 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23622 ; SSE2-NEXT: pxor %xmm0, %xmm0
23623 ; SSE2-NEXT: psadbw %xmm0, %xmm1
23624 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23625 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23626 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
23627 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23628 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
23629 ; SSE2-NEXT: pand %xmm2, %xmm3
23630 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
23631 ; SSE2-NEXT: por %xmm3, %xmm0
23634 ; SSE3-LABEL: ugt_20_v2i64:
23636 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23637 ; SSE3-NEXT: psrlw $1, %xmm1
23638 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23639 ; SSE3-NEXT: psubb %xmm1, %xmm0
23640 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23641 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23642 ; SSE3-NEXT: pand %xmm1, %xmm2
23643 ; SSE3-NEXT: psrlw $2, %xmm0
23644 ; SSE3-NEXT: pand %xmm1, %xmm0
23645 ; SSE3-NEXT: paddb %xmm2, %xmm0
23646 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23647 ; SSE3-NEXT: psrlw $4, %xmm1
23648 ; SSE3-NEXT: paddb %xmm0, %xmm1
23649 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23650 ; SSE3-NEXT: pxor %xmm0, %xmm0
23651 ; SSE3-NEXT: psadbw %xmm0, %xmm1
23652 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23653 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23654 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
23655 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23656 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
23657 ; SSE3-NEXT: pand %xmm2, %xmm3
23658 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
23659 ; SSE3-NEXT: por %xmm3, %xmm0
23662 ; SSSE3-LABEL: ugt_20_v2i64:
23664 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23665 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23666 ; SSSE3-NEXT: pand %xmm1, %xmm2
23667 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23668 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23669 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23670 ; SSSE3-NEXT: psrlw $4, %xmm0
23671 ; SSSE3-NEXT: pand %xmm1, %xmm0
23672 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23673 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23674 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23675 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
23676 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23677 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
23678 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23679 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23680 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
23681 ; SSSE3-NEXT: pand %xmm1, %xmm2
23682 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
23683 ; SSSE3-NEXT: por %xmm2, %xmm0
23686 ; SSE41-LABEL: ugt_20_v2i64:
23688 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23689 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23690 ; SSE41-NEXT: pand %xmm1, %xmm2
23691 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23692 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23693 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23694 ; SSE41-NEXT: psrlw $4, %xmm0
23695 ; SSE41-NEXT: pand %xmm1, %xmm0
23696 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23697 ; SSE41-NEXT: paddb %xmm4, %xmm3
23698 ; SSE41-NEXT: pxor %xmm0, %xmm0
23699 ; SSE41-NEXT: psadbw %xmm0, %xmm3
23700 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23701 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
23702 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
23703 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23704 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
23705 ; SSE41-NEXT: pand %xmm1, %xmm2
23706 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
23707 ; SSE41-NEXT: por %xmm2, %xmm0
23710 ; AVX1-LABEL: ugt_20_v2i64:
23712 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23713 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23714 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23715 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23716 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23717 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23718 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23719 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23720 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23721 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23722 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23725 ; AVX2-LABEL: ugt_20_v2i64:
23727 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23728 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23729 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23730 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23731 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23732 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23733 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23734 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23735 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23736 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23737 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23740 ; AVX512VPOPCNTDQ-LABEL: ugt_20_v2i64:
23741 ; AVX512VPOPCNTDQ: # %bb.0:
23742 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23743 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23744 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23745 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23746 ; AVX512VPOPCNTDQ-NEXT: retq
23748 ; AVX512VPOPCNTDQVL-LABEL: ugt_20_v2i64:
23749 ; AVX512VPOPCNTDQVL: # %bb.0:
23750 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23751 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23752 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23753 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23754 ; AVX512VPOPCNTDQVL-NEXT: retq
23756 ; BITALG_NOVLX-LABEL: ugt_20_v2i64:
23757 ; BITALG_NOVLX: # %bb.0:
23758 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23759 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23760 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23761 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23762 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23763 ; BITALG_NOVLX-NEXT: vzeroupper
23764 ; BITALG_NOVLX-NEXT: retq
23766 ; BITALG-LABEL: ugt_20_v2i64:
23768 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23769 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23770 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23771 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23772 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23773 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23774 ; BITALG-NEXT: retq
23775 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23776 %3 = icmp ugt <2 x i64> %2, <i64 20, i64 20>
23777 %4 = sext <2 x i1> %3 to <2 x i64>
23781 define <2 x i64> @ult_21_v2i64(<2 x i64> %0) {
23782 ; SSE2-LABEL: ult_21_v2i64:
23784 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23785 ; SSE2-NEXT: psrlw $1, %xmm1
23786 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23787 ; SSE2-NEXT: psubb %xmm1, %xmm0
23788 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23789 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23790 ; SSE2-NEXT: pand %xmm1, %xmm2
23791 ; SSE2-NEXT: psrlw $2, %xmm0
23792 ; SSE2-NEXT: pand %xmm1, %xmm0
23793 ; SSE2-NEXT: paddb %xmm2, %xmm0
23794 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23795 ; SSE2-NEXT: psrlw $4, %xmm1
23796 ; SSE2-NEXT: paddb %xmm0, %xmm1
23797 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23798 ; SSE2-NEXT: pxor %xmm0, %xmm0
23799 ; SSE2-NEXT: psadbw %xmm0, %xmm1
23800 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23801 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23802 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
23803 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483669,2147483669]
23804 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
23805 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23806 ; SSE2-NEXT: pand %xmm2, %xmm1
23807 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23808 ; SSE2-NEXT: por %xmm1, %xmm0
23811 ; SSE3-LABEL: ult_21_v2i64:
23813 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23814 ; SSE3-NEXT: psrlw $1, %xmm1
23815 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23816 ; SSE3-NEXT: psubb %xmm1, %xmm0
23817 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23818 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23819 ; SSE3-NEXT: pand %xmm1, %xmm2
23820 ; SSE3-NEXT: psrlw $2, %xmm0
23821 ; SSE3-NEXT: pand %xmm1, %xmm0
23822 ; SSE3-NEXT: paddb %xmm2, %xmm0
23823 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23824 ; SSE3-NEXT: psrlw $4, %xmm1
23825 ; SSE3-NEXT: paddb %xmm0, %xmm1
23826 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23827 ; SSE3-NEXT: pxor %xmm0, %xmm0
23828 ; SSE3-NEXT: psadbw %xmm0, %xmm1
23829 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23830 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23831 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
23832 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483669,2147483669]
23833 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
23834 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23835 ; SSE3-NEXT: pand %xmm2, %xmm1
23836 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23837 ; SSE3-NEXT: por %xmm1, %xmm0
23840 ; SSSE3-LABEL: ult_21_v2i64:
23842 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23843 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23844 ; SSSE3-NEXT: pand %xmm1, %xmm2
23845 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23846 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23847 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23848 ; SSSE3-NEXT: psrlw $4, %xmm0
23849 ; SSSE3-NEXT: pand %xmm1, %xmm0
23850 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23851 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23852 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23853 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
23854 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23855 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
23856 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23857 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483669,2147483669]
23858 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
23859 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
23860 ; SSSE3-NEXT: pand %xmm1, %xmm2
23861 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23862 ; SSSE3-NEXT: por %xmm2, %xmm0
23865 ; SSE41-LABEL: ult_21_v2i64:
23867 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23868 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23869 ; SSE41-NEXT: pand %xmm1, %xmm2
23870 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23871 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23872 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23873 ; SSE41-NEXT: psrlw $4, %xmm0
23874 ; SSE41-NEXT: pand %xmm1, %xmm0
23875 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23876 ; SSE41-NEXT: paddb %xmm4, %xmm3
23877 ; SSE41-NEXT: pxor %xmm0, %xmm0
23878 ; SSE41-NEXT: psadbw %xmm0, %xmm3
23879 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
23880 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
23881 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
23882 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483669,2147483669]
23883 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
23884 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
23885 ; SSE41-NEXT: pand %xmm1, %xmm2
23886 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
23887 ; SSE41-NEXT: por %xmm2, %xmm0
23890 ; AVX1-LABEL: ult_21_v2i64:
23892 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23893 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23894 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23895 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23896 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23897 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23898 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23899 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23900 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23901 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23902 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21]
23903 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23906 ; AVX2-LABEL: ult_21_v2i64:
23908 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23909 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23910 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23911 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23912 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23913 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23914 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23915 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23916 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23917 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23918 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21]
23919 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23922 ; AVX512VPOPCNTDQ-LABEL: ult_21_v2i64:
23923 ; AVX512VPOPCNTDQ: # %bb.0:
23924 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23925 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23926 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21]
23927 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23928 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23929 ; AVX512VPOPCNTDQ-NEXT: retq
23931 ; AVX512VPOPCNTDQVL-LABEL: ult_21_v2i64:
23932 ; AVX512VPOPCNTDQVL: # %bb.0:
23933 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23934 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23935 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23936 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23937 ; AVX512VPOPCNTDQVL-NEXT: retq
23939 ; BITALG_NOVLX-LABEL: ult_21_v2i64:
23940 ; BITALG_NOVLX: # %bb.0:
23941 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23942 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23943 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23944 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23945 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21]
23946 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23947 ; BITALG_NOVLX-NEXT: vzeroupper
23948 ; BITALG_NOVLX-NEXT: retq
23950 ; BITALG-LABEL: ult_21_v2i64:
23952 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23953 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23954 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23955 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
23956 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
23957 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
23958 ; BITALG-NEXT: retq
23959 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23960 %3 = icmp ult <2 x i64> %2, <i64 21, i64 21>
23961 %4 = sext <2 x i1> %3 to <2 x i64>
23965 define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) {
23966 ; SSE2-LABEL: ugt_21_v2i64:
23968 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23969 ; SSE2-NEXT: psrlw $1, %xmm1
23970 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23971 ; SSE2-NEXT: psubb %xmm1, %xmm0
23972 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23973 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23974 ; SSE2-NEXT: pand %xmm1, %xmm2
23975 ; SSE2-NEXT: psrlw $2, %xmm0
23976 ; SSE2-NEXT: pand %xmm1, %xmm0
23977 ; SSE2-NEXT: paddb %xmm2, %xmm0
23978 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23979 ; SSE2-NEXT: psrlw $4, %xmm1
23980 ; SSE2-NEXT: paddb %xmm0, %xmm1
23981 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23982 ; SSE2-NEXT: pxor %xmm0, %xmm0
23983 ; SSE2-NEXT: psadbw %xmm0, %xmm1
23984 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23985 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
23986 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
23987 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23988 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
23989 ; SSE2-NEXT: pand %xmm2, %xmm3
23990 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
23991 ; SSE2-NEXT: por %xmm3, %xmm0
23994 ; SSE3-LABEL: ugt_21_v2i64:
23996 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23997 ; SSE3-NEXT: psrlw $1, %xmm1
23998 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23999 ; SSE3-NEXT: psubb %xmm1, %xmm0
24000 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24001 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24002 ; SSE3-NEXT: pand %xmm1, %xmm2
24003 ; SSE3-NEXT: psrlw $2, %xmm0
24004 ; SSE3-NEXT: pand %xmm1, %xmm0
24005 ; SSE3-NEXT: paddb %xmm2, %xmm0
24006 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24007 ; SSE3-NEXT: psrlw $4, %xmm1
24008 ; SSE3-NEXT: paddb %xmm0, %xmm1
24009 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24010 ; SSE3-NEXT: pxor %xmm0, %xmm0
24011 ; SSE3-NEXT: psadbw %xmm0, %xmm1
24012 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24013 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24014 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
24015 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24016 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
24017 ; SSE3-NEXT: pand %xmm2, %xmm3
24018 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
24019 ; SSE3-NEXT: por %xmm3, %xmm0
24022 ; SSSE3-LABEL: ugt_21_v2i64:
24024 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24025 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24026 ; SSSE3-NEXT: pand %xmm1, %xmm2
24027 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24028 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24029 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24030 ; SSSE3-NEXT: psrlw $4, %xmm0
24031 ; SSSE3-NEXT: pand %xmm1, %xmm0
24032 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24033 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24034 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24035 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
24036 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24037 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24038 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
24039 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24040 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
24041 ; SSSE3-NEXT: pand %xmm1, %xmm2
24042 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
24043 ; SSSE3-NEXT: por %xmm2, %xmm0
24046 ; SSE41-LABEL: ugt_21_v2i64:
24048 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24049 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24050 ; SSE41-NEXT: pand %xmm1, %xmm2
24051 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24052 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24053 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24054 ; SSE41-NEXT: psrlw $4, %xmm0
24055 ; SSE41-NEXT: pand %xmm1, %xmm0
24056 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24057 ; SSE41-NEXT: paddb %xmm4, %xmm3
24058 ; SSE41-NEXT: pxor %xmm0, %xmm0
24059 ; SSE41-NEXT: psadbw %xmm0, %xmm3
24060 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24061 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24062 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
24063 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24064 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
24065 ; SSE41-NEXT: pand %xmm1, %xmm2
24066 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
24067 ; SSE41-NEXT: por %xmm2, %xmm0
24070 ; AVX1-LABEL: ugt_21_v2i64:
24072 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24073 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24074 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24075 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24076 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24077 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24078 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24079 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24080 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24081 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24082 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24085 ; AVX2-LABEL: ugt_21_v2i64:
24087 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24088 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24089 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24090 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24091 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24092 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24093 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24094 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24095 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24096 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24097 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24100 ; AVX512VPOPCNTDQ-LABEL: ugt_21_v2i64:
24101 ; AVX512VPOPCNTDQ: # %bb.0:
24102 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24103 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24104 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24105 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24106 ; AVX512VPOPCNTDQ-NEXT: retq
24108 ; AVX512VPOPCNTDQVL-LABEL: ugt_21_v2i64:
24109 ; AVX512VPOPCNTDQVL: # %bb.0:
24110 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24111 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
24112 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
24113 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24114 ; AVX512VPOPCNTDQVL-NEXT: retq
24116 ; BITALG_NOVLX-LABEL: ugt_21_v2i64:
24117 ; BITALG_NOVLX: # %bb.0:
24118 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24119 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24120 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24121 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24122 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24123 ; BITALG_NOVLX-NEXT: vzeroupper
24124 ; BITALG_NOVLX-NEXT: retq
24126 ; BITALG-LABEL: ugt_21_v2i64:
24128 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24129 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24130 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24131 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
24132 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
24133 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24134 ; BITALG-NEXT: retq
24135 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24136 %3 = icmp ugt <2 x i64> %2, <i64 21, i64 21>
24137 %4 = sext <2 x i1> %3 to <2 x i64>
24141 define <2 x i64> @ult_22_v2i64(<2 x i64> %0) {
24142 ; SSE2-LABEL: ult_22_v2i64:
24144 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24145 ; SSE2-NEXT: psrlw $1, %xmm1
24146 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24147 ; SSE2-NEXT: psubb %xmm1, %xmm0
24148 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24149 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24150 ; SSE2-NEXT: pand %xmm1, %xmm2
24151 ; SSE2-NEXT: psrlw $2, %xmm0
24152 ; SSE2-NEXT: pand %xmm1, %xmm0
24153 ; SSE2-NEXT: paddb %xmm2, %xmm0
24154 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24155 ; SSE2-NEXT: psrlw $4, %xmm1
24156 ; SSE2-NEXT: paddb %xmm0, %xmm1
24157 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24158 ; SSE2-NEXT: pxor %xmm0, %xmm0
24159 ; SSE2-NEXT: psadbw %xmm0, %xmm1
24160 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24161 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24162 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
24163 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483670,2147483670]
24164 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
24165 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24166 ; SSE2-NEXT: pand %xmm2, %xmm1
24167 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24168 ; SSE2-NEXT: por %xmm1, %xmm0
24171 ; SSE3-LABEL: ult_22_v2i64:
24173 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24174 ; SSE3-NEXT: psrlw $1, %xmm1
24175 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24176 ; SSE3-NEXT: psubb %xmm1, %xmm0
24177 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24178 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24179 ; SSE3-NEXT: pand %xmm1, %xmm2
24180 ; SSE3-NEXT: psrlw $2, %xmm0
24181 ; SSE3-NEXT: pand %xmm1, %xmm0
24182 ; SSE3-NEXT: paddb %xmm2, %xmm0
24183 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24184 ; SSE3-NEXT: psrlw $4, %xmm1
24185 ; SSE3-NEXT: paddb %xmm0, %xmm1
24186 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24187 ; SSE3-NEXT: pxor %xmm0, %xmm0
24188 ; SSE3-NEXT: psadbw %xmm0, %xmm1
24189 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24190 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24191 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
24192 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483670,2147483670]
24193 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
24194 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24195 ; SSE3-NEXT: pand %xmm2, %xmm1
24196 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24197 ; SSE3-NEXT: por %xmm1, %xmm0
24200 ; SSSE3-LABEL: ult_22_v2i64:
24202 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24203 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24204 ; SSSE3-NEXT: pand %xmm1, %xmm2
24205 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24206 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24207 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24208 ; SSSE3-NEXT: psrlw $4, %xmm0
24209 ; SSSE3-NEXT: pand %xmm1, %xmm0
24210 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24211 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24212 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24213 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
24214 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24215 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24216 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
24217 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483670,2147483670]
24218 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
24219 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
24220 ; SSSE3-NEXT: pand %xmm1, %xmm2
24221 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24222 ; SSSE3-NEXT: por %xmm2, %xmm0
24225 ; SSE41-LABEL: ult_22_v2i64:
24227 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24228 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24229 ; SSE41-NEXT: pand %xmm1, %xmm2
24230 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24231 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24232 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24233 ; SSE41-NEXT: psrlw $4, %xmm0
24234 ; SSE41-NEXT: pand %xmm1, %xmm0
24235 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24236 ; SSE41-NEXT: paddb %xmm4, %xmm3
24237 ; SSE41-NEXT: pxor %xmm0, %xmm0
24238 ; SSE41-NEXT: psadbw %xmm0, %xmm3
24239 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24240 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24241 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
24242 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483670,2147483670]
24243 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
24244 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
24245 ; SSE41-NEXT: pand %xmm1, %xmm2
24246 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24247 ; SSE41-NEXT: por %xmm2, %xmm0
24250 ; AVX1-LABEL: ult_22_v2i64:
24252 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24253 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24254 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24255 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24256 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24257 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24258 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24259 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24260 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24261 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24262 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22]
24263 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24266 ; AVX2-LABEL: ult_22_v2i64:
24268 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24269 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24270 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24271 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24272 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24273 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24274 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24275 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24276 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24277 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24278 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22]
24279 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24282 ; AVX512VPOPCNTDQ-LABEL: ult_22_v2i64:
24283 ; AVX512VPOPCNTDQ: # %bb.0:
24284 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24285 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24286 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22]
24287 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24288 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24289 ; AVX512VPOPCNTDQ-NEXT: retq
24291 ; AVX512VPOPCNTDQVL-LABEL: ult_22_v2i64:
24292 ; AVX512VPOPCNTDQVL: # %bb.0:
24293 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24294 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
24295 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
24296 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24297 ; AVX512VPOPCNTDQVL-NEXT: retq
24299 ; BITALG_NOVLX-LABEL: ult_22_v2i64:
24300 ; BITALG_NOVLX: # %bb.0:
24301 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24302 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24303 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24304 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24305 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22]
24306 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24307 ; BITALG_NOVLX-NEXT: vzeroupper
24308 ; BITALG_NOVLX-NEXT: retq
24310 ; BITALG-LABEL: ult_22_v2i64:
24312 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24313 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24314 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24315 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
24316 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
24317 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24318 ; BITALG-NEXT: retq
24319 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24320 %3 = icmp ult <2 x i64> %2, <i64 22, i64 22>
24321 %4 = sext <2 x i1> %3 to <2 x i64>
24325 define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) {
24326 ; SSE2-LABEL: ugt_22_v2i64:
24328 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24329 ; SSE2-NEXT: psrlw $1, %xmm1
24330 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24331 ; SSE2-NEXT: psubb %xmm1, %xmm0
24332 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24333 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24334 ; SSE2-NEXT: pand %xmm1, %xmm2
24335 ; SSE2-NEXT: psrlw $2, %xmm0
24336 ; SSE2-NEXT: pand %xmm1, %xmm0
24337 ; SSE2-NEXT: paddb %xmm2, %xmm0
24338 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24339 ; SSE2-NEXT: psrlw $4, %xmm1
24340 ; SSE2-NEXT: paddb %xmm0, %xmm1
24341 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24342 ; SSE2-NEXT: pxor %xmm0, %xmm0
24343 ; SSE2-NEXT: psadbw %xmm0, %xmm1
24344 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24345 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24346 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
24347 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24348 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
24349 ; SSE2-NEXT: pand %xmm2, %xmm3
24350 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
24351 ; SSE2-NEXT: por %xmm3, %xmm0
24354 ; SSE3-LABEL: ugt_22_v2i64:
24356 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24357 ; SSE3-NEXT: psrlw $1, %xmm1
24358 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24359 ; SSE3-NEXT: psubb %xmm1, %xmm0
24360 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24361 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24362 ; SSE3-NEXT: pand %xmm1, %xmm2
24363 ; SSE3-NEXT: psrlw $2, %xmm0
24364 ; SSE3-NEXT: pand %xmm1, %xmm0
24365 ; SSE3-NEXT: paddb %xmm2, %xmm0
24366 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24367 ; SSE3-NEXT: psrlw $4, %xmm1
24368 ; SSE3-NEXT: paddb %xmm0, %xmm1
24369 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24370 ; SSE3-NEXT: pxor %xmm0, %xmm0
24371 ; SSE3-NEXT: psadbw %xmm0, %xmm1
24372 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24373 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24374 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
24375 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24376 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
24377 ; SSE3-NEXT: pand %xmm2, %xmm3
24378 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
24379 ; SSE3-NEXT: por %xmm3, %xmm0
24382 ; SSSE3-LABEL: ugt_22_v2i64:
24384 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24385 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24386 ; SSSE3-NEXT: pand %xmm1, %xmm2
24387 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24388 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24389 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24390 ; SSSE3-NEXT: psrlw $4, %xmm0
24391 ; SSSE3-NEXT: pand %xmm1, %xmm0
24392 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24393 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24394 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24395 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
24396 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24397 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24398 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
24399 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24400 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
24401 ; SSSE3-NEXT: pand %xmm1, %xmm2
24402 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
24403 ; SSSE3-NEXT: por %xmm2, %xmm0
24406 ; SSE41-LABEL: ugt_22_v2i64:
24408 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24409 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24410 ; SSE41-NEXT: pand %xmm1, %xmm2
24411 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24412 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24413 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24414 ; SSE41-NEXT: psrlw $4, %xmm0
24415 ; SSE41-NEXT: pand %xmm1, %xmm0
24416 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24417 ; SSE41-NEXT: paddb %xmm4, %xmm3
24418 ; SSE41-NEXT: pxor %xmm0, %xmm0
24419 ; SSE41-NEXT: psadbw %xmm0, %xmm3
24420 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24421 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24422 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
24423 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24424 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
24425 ; SSE41-NEXT: pand %xmm1, %xmm2
24426 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
24427 ; SSE41-NEXT: por %xmm2, %xmm0
24430 ; AVX1-LABEL: ugt_22_v2i64:
24432 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24433 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24434 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24435 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24436 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24437 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24438 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24439 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24440 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24441 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24442 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24445 ; AVX2-LABEL: ugt_22_v2i64:
24447 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24448 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24449 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24450 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24451 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24452 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24453 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24454 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24455 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24456 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24457 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24460 ; AVX512VPOPCNTDQ-LABEL: ugt_22_v2i64:
24461 ; AVX512VPOPCNTDQ: # %bb.0:
24462 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24463 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24464 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24465 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24466 ; AVX512VPOPCNTDQ-NEXT: retq
24468 ; AVX512VPOPCNTDQVL-LABEL: ugt_22_v2i64:
24469 ; AVX512VPOPCNTDQVL: # %bb.0:
24470 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24471 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
24472 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
24473 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24474 ; AVX512VPOPCNTDQVL-NEXT: retq
24476 ; BITALG_NOVLX-LABEL: ugt_22_v2i64:
24477 ; BITALG_NOVLX: # %bb.0:
24478 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24479 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24480 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24481 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24482 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24483 ; BITALG_NOVLX-NEXT: vzeroupper
24484 ; BITALG_NOVLX-NEXT: retq
24486 ; BITALG-LABEL: ugt_22_v2i64:
24488 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24489 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24490 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24491 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
24492 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
24493 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24494 ; BITALG-NEXT: retq
24495 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24496 %3 = icmp ugt <2 x i64> %2, <i64 22, i64 22>
24497 %4 = sext <2 x i1> %3 to <2 x i64>
24501 define <2 x i64> @ult_23_v2i64(<2 x i64> %0) {
24502 ; SSE2-LABEL: ult_23_v2i64:
24504 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24505 ; SSE2-NEXT: psrlw $1, %xmm1
24506 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24507 ; SSE2-NEXT: psubb %xmm1, %xmm0
24508 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24509 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24510 ; SSE2-NEXT: pand %xmm1, %xmm2
24511 ; SSE2-NEXT: psrlw $2, %xmm0
24512 ; SSE2-NEXT: pand %xmm1, %xmm0
24513 ; SSE2-NEXT: paddb %xmm2, %xmm0
24514 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24515 ; SSE2-NEXT: psrlw $4, %xmm1
24516 ; SSE2-NEXT: paddb %xmm0, %xmm1
24517 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24518 ; SSE2-NEXT: pxor %xmm0, %xmm0
24519 ; SSE2-NEXT: psadbw %xmm0, %xmm1
24520 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24521 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24522 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
24523 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483671,2147483671]
24524 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
24525 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24526 ; SSE2-NEXT: pand %xmm2, %xmm1
24527 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24528 ; SSE2-NEXT: por %xmm1, %xmm0
24531 ; SSE3-LABEL: ult_23_v2i64:
24533 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24534 ; SSE3-NEXT: psrlw $1, %xmm1
24535 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24536 ; SSE3-NEXT: psubb %xmm1, %xmm0
24537 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24538 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24539 ; SSE3-NEXT: pand %xmm1, %xmm2
24540 ; SSE3-NEXT: psrlw $2, %xmm0
24541 ; SSE3-NEXT: pand %xmm1, %xmm0
24542 ; SSE3-NEXT: paddb %xmm2, %xmm0
24543 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24544 ; SSE3-NEXT: psrlw $4, %xmm1
24545 ; SSE3-NEXT: paddb %xmm0, %xmm1
24546 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24547 ; SSE3-NEXT: pxor %xmm0, %xmm0
24548 ; SSE3-NEXT: psadbw %xmm0, %xmm1
24549 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24550 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24551 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
24552 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483671,2147483671]
24553 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
24554 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24555 ; SSE3-NEXT: pand %xmm2, %xmm1
24556 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24557 ; SSE3-NEXT: por %xmm1, %xmm0
24560 ; SSSE3-LABEL: ult_23_v2i64:
24562 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24563 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24564 ; SSSE3-NEXT: pand %xmm1, %xmm2
24565 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24566 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24567 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24568 ; SSSE3-NEXT: psrlw $4, %xmm0
24569 ; SSSE3-NEXT: pand %xmm1, %xmm0
24570 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24571 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24572 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24573 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
24574 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24575 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24576 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
24577 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483671,2147483671]
24578 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
24579 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
24580 ; SSSE3-NEXT: pand %xmm1, %xmm2
24581 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24582 ; SSSE3-NEXT: por %xmm2, %xmm0
24585 ; SSE41-LABEL: ult_23_v2i64:
24587 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24588 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24589 ; SSE41-NEXT: pand %xmm1, %xmm2
24590 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24591 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24592 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24593 ; SSE41-NEXT: psrlw $4, %xmm0
24594 ; SSE41-NEXT: pand %xmm1, %xmm0
24595 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24596 ; SSE41-NEXT: paddb %xmm4, %xmm3
24597 ; SSE41-NEXT: pxor %xmm0, %xmm0
24598 ; SSE41-NEXT: psadbw %xmm0, %xmm3
24599 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24600 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24601 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
24602 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483671,2147483671]
24603 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
24604 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
24605 ; SSE41-NEXT: pand %xmm1, %xmm2
24606 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24607 ; SSE41-NEXT: por %xmm2, %xmm0
24610 ; AVX1-LABEL: ult_23_v2i64:
24612 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24613 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24614 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24615 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24616 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24617 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24618 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24619 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24620 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24621 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24622 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23]
24623 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24626 ; AVX2-LABEL: ult_23_v2i64:
24628 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24629 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24630 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24631 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24632 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24633 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24634 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24635 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24636 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24637 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24638 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23]
24639 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24642 ; AVX512VPOPCNTDQ-LABEL: ult_23_v2i64:
24643 ; AVX512VPOPCNTDQ: # %bb.0:
24644 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24645 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24646 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23]
24647 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24648 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24649 ; AVX512VPOPCNTDQ-NEXT: retq
24651 ; AVX512VPOPCNTDQVL-LABEL: ult_23_v2i64:
24652 ; AVX512VPOPCNTDQVL: # %bb.0:
24653 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24654 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
24655 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
24656 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24657 ; AVX512VPOPCNTDQVL-NEXT: retq
24659 ; BITALG_NOVLX-LABEL: ult_23_v2i64:
24660 ; BITALG_NOVLX: # %bb.0:
24661 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24662 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24663 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24664 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24665 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23]
24666 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24667 ; BITALG_NOVLX-NEXT: vzeroupper
24668 ; BITALG_NOVLX-NEXT: retq
24670 ; BITALG-LABEL: ult_23_v2i64:
24672 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24673 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24674 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24675 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
24676 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
24677 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24678 ; BITALG-NEXT: retq
24679 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24680 %3 = icmp ult <2 x i64> %2, <i64 23, i64 23>
24681 %4 = sext <2 x i1> %3 to <2 x i64>
24685 define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) {
24686 ; SSE2-LABEL: ugt_23_v2i64:
24688 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24689 ; SSE2-NEXT: psrlw $1, %xmm1
24690 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24691 ; SSE2-NEXT: psubb %xmm1, %xmm0
24692 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24693 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24694 ; SSE2-NEXT: pand %xmm1, %xmm2
24695 ; SSE2-NEXT: psrlw $2, %xmm0
24696 ; SSE2-NEXT: pand %xmm1, %xmm0
24697 ; SSE2-NEXT: paddb %xmm2, %xmm0
24698 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24699 ; SSE2-NEXT: psrlw $4, %xmm1
24700 ; SSE2-NEXT: paddb %xmm0, %xmm1
24701 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24702 ; SSE2-NEXT: pxor %xmm0, %xmm0
24703 ; SSE2-NEXT: psadbw %xmm0, %xmm1
24704 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24705 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24706 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
24707 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24708 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
24709 ; SSE2-NEXT: pand %xmm2, %xmm3
24710 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
24711 ; SSE2-NEXT: por %xmm3, %xmm0
24714 ; SSE3-LABEL: ugt_23_v2i64:
24716 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24717 ; SSE3-NEXT: psrlw $1, %xmm1
24718 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24719 ; SSE3-NEXT: psubb %xmm1, %xmm0
24720 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24721 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24722 ; SSE3-NEXT: pand %xmm1, %xmm2
24723 ; SSE3-NEXT: psrlw $2, %xmm0
24724 ; SSE3-NEXT: pand %xmm1, %xmm0
24725 ; SSE3-NEXT: paddb %xmm2, %xmm0
24726 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24727 ; SSE3-NEXT: psrlw $4, %xmm1
24728 ; SSE3-NEXT: paddb %xmm0, %xmm1
24729 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24730 ; SSE3-NEXT: pxor %xmm0, %xmm0
24731 ; SSE3-NEXT: psadbw %xmm0, %xmm1
24732 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24733 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24734 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
24735 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24736 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
24737 ; SSE3-NEXT: pand %xmm2, %xmm3
24738 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
24739 ; SSE3-NEXT: por %xmm3, %xmm0
24742 ; SSSE3-LABEL: ugt_23_v2i64:
24744 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24745 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24746 ; SSSE3-NEXT: pand %xmm1, %xmm2
24747 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24748 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24749 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24750 ; SSSE3-NEXT: psrlw $4, %xmm0
24751 ; SSSE3-NEXT: pand %xmm1, %xmm0
24752 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24753 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24754 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24755 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
24756 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24757 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24758 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
24759 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24760 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
24761 ; SSSE3-NEXT: pand %xmm1, %xmm2
24762 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
24763 ; SSSE3-NEXT: por %xmm2, %xmm0
24766 ; SSE41-LABEL: ugt_23_v2i64:
24768 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24769 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24770 ; SSE41-NEXT: pand %xmm1, %xmm2
24771 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24772 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24773 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24774 ; SSE41-NEXT: psrlw $4, %xmm0
24775 ; SSE41-NEXT: pand %xmm1, %xmm0
24776 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24777 ; SSE41-NEXT: paddb %xmm4, %xmm3
24778 ; SSE41-NEXT: pxor %xmm0, %xmm0
24779 ; SSE41-NEXT: psadbw %xmm0, %xmm3
24780 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24781 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24782 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
24783 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24784 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
24785 ; SSE41-NEXT: pand %xmm1, %xmm2
24786 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
24787 ; SSE41-NEXT: por %xmm2, %xmm0
24790 ; AVX1-LABEL: ugt_23_v2i64:
24792 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24793 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24794 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24795 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24796 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24797 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24798 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24799 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24800 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24801 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24802 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24805 ; AVX2-LABEL: ugt_23_v2i64:
24807 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24808 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24809 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24810 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24811 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24812 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24813 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24814 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24815 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24816 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24817 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24820 ; AVX512VPOPCNTDQ-LABEL: ugt_23_v2i64:
24821 ; AVX512VPOPCNTDQ: # %bb.0:
24822 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24823 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24824 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24825 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24826 ; AVX512VPOPCNTDQ-NEXT: retq
24828 ; AVX512VPOPCNTDQVL-LABEL: ugt_23_v2i64:
24829 ; AVX512VPOPCNTDQVL: # %bb.0:
24830 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24831 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
24832 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
24833 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24834 ; AVX512VPOPCNTDQVL-NEXT: retq
24836 ; BITALG_NOVLX-LABEL: ugt_23_v2i64:
24837 ; BITALG_NOVLX: # %bb.0:
24838 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24839 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24840 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24841 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24842 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24843 ; BITALG_NOVLX-NEXT: vzeroupper
24844 ; BITALG_NOVLX-NEXT: retq
24846 ; BITALG-LABEL: ugt_23_v2i64:
24848 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24849 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24850 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24851 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
24852 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
24853 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
24854 ; BITALG-NEXT: retq
24855 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24856 %3 = icmp ugt <2 x i64> %2, <i64 23, i64 23>
24857 %4 = sext <2 x i1> %3 to <2 x i64>
24861 define <2 x i64> @ult_24_v2i64(<2 x i64> %0) {
24862 ; SSE2-LABEL: ult_24_v2i64:
24864 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24865 ; SSE2-NEXT: psrlw $1, %xmm1
24866 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24867 ; SSE2-NEXT: psubb %xmm1, %xmm0
24868 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24869 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24870 ; SSE2-NEXT: pand %xmm1, %xmm2
24871 ; SSE2-NEXT: psrlw $2, %xmm0
24872 ; SSE2-NEXT: pand %xmm1, %xmm0
24873 ; SSE2-NEXT: paddb %xmm2, %xmm0
24874 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24875 ; SSE2-NEXT: psrlw $4, %xmm1
24876 ; SSE2-NEXT: paddb %xmm0, %xmm1
24877 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24878 ; SSE2-NEXT: pxor %xmm0, %xmm0
24879 ; SSE2-NEXT: psadbw %xmm0, %xmm1
24880 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24881 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24882 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
24883 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483672,2147483672]
24884 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
24885 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24886 ; SSE2-NEXT: pand %xmm2, %xmm1
24887 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24888 ; SSE2-NEXT: por %xmm1, %xmm0
24891 ; SSE3-LABEL: ult_24_v2i64:
24893 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24894 ; SSE3-NEXT: psrlw $1, %xmm1
24895 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24896 ; SSE3-NEXT: psubb %xmm1, %xmm0
24897 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24898 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24899 ; SSE3-NEXT: pand %xmm1, %xmm2
24900 ; SSE3-NEXT: psrlw $2, %xmm0
24901 ; SSE3-NEXT: pand %xmm1, %xmm0
24902 ; SSE3-NEXT: paddb %xmm2, %xmm0
24903 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24904 ; SSE3-NEXT: psrlw $4, %xmm1
24905 ; SSE3-NEXT: paddb %xmm0, %xmm1
24906 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24907 ; SSE3-NEXT: pxor %xmm0, %xmm0
24908 ; SSE3-NEXT: psadbw %xmm0, %xmm1
24909 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24910 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
24911 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
24912 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483672,2147483672]
24913 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
24914 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24915 ; SSE3-NEXT: pand %xmm2, %xmm1
24916 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24917 ; SSE3-NEXT: por %xmm1, %xmm0
24920 ; SSSE3-LABEL: ult_24_v2i64:
24922 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24923 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24924 ; SSSE3-NEXT: pand %xmm1, %xmm2
24925 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24926 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24927 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24928 ; SSSE3-NEXT: psrlw $4, %xmm0
24929 ; SSSE3-NEXT: pand %xmm1, %xmm0
24930 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24931 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24932 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24933 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
24934 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24935 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24936 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
24937 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483672,2147483672]
24938 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
24939 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
24940 ; SSSE3-NEXT: pand %xmm1, %xmm2
24941 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24942 ; SSSE3-NEXT: por %xmm2, %xmm0
24945 ; SSE41-LABEL: ult_24_v2i64:
24947 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24948 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24949 ; SSE41-NEXT: pand %xmm1, %xmm2
24950 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24951 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24952 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24953 ; SSE41-NEXT: psrlw $4, %xmm0
24954 ; SSE41-NEXT: pand %xmm1, %xmm0
24955 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24956 ; SSE41-NEXT: paddb %xmm4, %xmm3
24957 ; SSE41-NEXT: pxor %xmm0, %xmm0
24958 ; SSE41-NEXT: psadbw %xmm0, %xmm3
24959 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
24960 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
24961 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
24962 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483672,2147483672]
24963 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
24964 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
24965 ; SSE41-NEXT: pand %xmm1, %xmm2
24966 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
24967 ; SSE41-NEXT: por %xmm2, %xmm0
24970 ; AVX1-LABEL: ult_24_v2i64:
24972 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24973 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24974 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24975 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24976 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24977 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24978 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24979 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24980 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24981 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24982 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24]
24983 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24986 ; AVX2-LABEL: ult_24_v2i64:
24988 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24989 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24990 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24991 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24992 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24993 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24994 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24995 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24996 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24997 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24998 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24]
24999 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25002 ; AVX512VPOPCNTDQ-LABEL: ult_24_v2i64:
25003 ; AVX512VPOPCNTDQ: # %bb.0:
25004 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25005 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25006 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24]
25007 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25008 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25009 ; AVX512VPOPCNTDQ-NEXT: retq
25011 ; AVX512VPOPCNTDQVL-LABEL: ult_24_v2i64:
25012 ; AVX512VPOPCNTDQVL: # %bb.0:
25013 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25014 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25015 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25016 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25017 ; AVX512VPOPCNTDQVL-NEXT: retq
25019 ; BITALG_NOVLX-LABEL: ult_24_v2i64:
25020 ; BITALG_NOVLX: # %bb.0:
25021 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25022 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25023 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25024 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25025 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24]
25026 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25027 ; BITALG_NOVLX-NEXT: vzeroupper
25028 ; BITALG_NOVLX-NEXT: retq
25030 ; BITALG-LABEL: ult_24_v2i64:
25032 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25033 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25034 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25035 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25036 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25037 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25038 ; BITALG-NEXT: retq
25039 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25040 %3 = icmp ult <2 x i64> %2, <i64 24, i64 24>
25041 %4 = sext <2 x i1> %3 to <2 x i64>
25045 define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) {
25046 ; SSE2-LABEL: ugt_24_v2i64:
25048 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25049 ; SSE2-NEXT: psrlw $1, %xmm1
25050 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25051 ; SSE2-NEXT: psubb %xmm1, %xmm0
25052 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25053 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25054 ; SSE2-NEXT: pand %xmm1, %xmm2
25055 ; SSE2-NEXT: psrlw $2, %xmm0
25056 ; SSE2-NEXT: pand %xmm1, %xmm0
25057 ; SSE2-NEXT: paddb %xmm2, %xmm0
25058 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25059 ; SSE2-NEXT: psrlw $4, %xmm1
25060 ; SSE2-NEXT: paddb %xmm0, %xmm1
25061 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25062 ; SSE2-NEXT: pxor %xmm0, %xmm0
25063 ; SSE2-NEXT: psadbw %xmm0, %xmm1
25064 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25065 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25066 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
25067 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25068 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
25069 ; SSE2-NEXT: pand %xmm2, %xmm3
25070 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
25071 ; SSE2-NEXT: por %xmm3, %xmm0
25074 ; SSE3-LABEL: ugt_24_v2i64:
25076 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25077 ; SSE3-NEXT: psrlw $1, %xmm1
25078 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25079 ; SSE3-NEXT: psubb %xmm1, %xmm0
25080 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25081 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25082 ; SSE3-NEXT: pand %xmm1, %xmm2
25083 ; SSE3-NEXT: psrlw $2, %xmm0
25084 ; SSE3-NEXT: pand %xmm1, %xmm0
25085 ; SSE3-NEXT: paddb %xmm2, %xmm0
25086 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25087 ; SSE3-NEXT: psrlw $4, %xmm1
25088 ; SSE3-NEXT: paddb %xmm0, %xmm1
25089 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25090 ; SSE3-NEXT: pxor %xmm0, %xmm0
25091 ; SSE3-NEXT: psadbw %xmm0, %xmm1
25092 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25093 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25094 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
25095 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25096 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
25097 ; SSE3-NEXT: pand %xmm2, %xmm3
25098 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
25099 ; SSE3-NEXT: por %xmm3, %xmm0
25102 ; SSSE3-LABEL: ugt_24_v2i64:
25104 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25105 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25106 ; SSSE3-NEXT: pand %xmm1, %xmm2
25107 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25108 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25109 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25110 ; SSSE3-NEXT: psrlw $4, %xmm0
25111 ; SSSE3-NEXT: pand %xmm1, %xmm0
25112 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25113 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25114 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25115 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
25116 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25117 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
25118 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
25119 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25120 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
25121 ; SSSE3-NEXT: pand %xmm1, %xmm2
25122 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
25123 ; SSSE3-NEXT: por %xmm2, %xmm0
25126 ; SSE41-LABEL: ugt_24_v2i64:
25128 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25129 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25130 ; SSE41-NEXT: pand %xmm1, %xmm2
25131 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25132 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25133 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25134 ; SSE41-NEXT: psrlw $4, %xmm0
25135 ; SSE41-NEXT: pand %xmm1, %xmm0
25136 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25137 ; SSE41-NEXT: paddb %xmm4, %xmm3
25138 ; SSE41-NEXT: pxor %xmm0, %xmm0
25139 ; SSE41-NEXT: psadbw %xmm0, %xmm3
25140 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25141 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
25142 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
25143 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25144 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
25145 ; SSE41-NEXT: pand %xmm1, %xmm2
25146 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
25147 ; SSE41-NEXT: por %xmm2, %xmm0
25150 ; AVX1-LABEL: ugt_24_v2i64:
25152 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25153 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25154 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25155 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25156 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25157 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25158 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25159 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25160 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25161 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25162 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25165 ; AVX2-LABEL: ugt_24_v2i64:
25167 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25168 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25169 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25170 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25171 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25172 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25173 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25174 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25175 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25176 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25177 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25180 ; AVX512VPOPCNTDQ-LABEL: ugt_24_v2i64:
25181 ; AVX512VPOPCNTDQ: # %bb.0:
25182 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25183 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25184 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25185 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25186 ; AVX512VPOPCNTDQ-NEXT: retq
25188 ; AVX512VPOPCNTDQVL-LABEL: ugt_24_v2i64:
25189 ; AVX512VPOPCNTDQVL: # %bb.0:
25190 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25191 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25192 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25193 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25194 ; AVX512VPOPCNTDQVL-NEXT: retq
25196 ; BITALG_NOVLX-LABEL: ugt_24_v2i64:
25197 ; BITALG_NOVLX: # %bb.0:
25198 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25199 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25200 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25201 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25202 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25203 ; BITALG_NOVLX-NEXT: vzeroupper
25204 ; BITALG_NOVLX-NEXT: retq
25206 ; BITALG-LABEL: ugt_24_v2i64:
25208 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25209 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25210 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25211 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25212 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25213 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25214 ; BITALG-NEXT: retq
25215 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25216 %3 = icmp ugt <2 x i64> %2, <i64 24, i64 24>
25217 %4 = sext <2 x i1> %3 to <2 x i64>
25221 define <2 x i64> @ult_25_v2i64(<2 x i64> %0) {
25222 ; SSE2-LABEL: ult_25_v2i64:
25224 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25225 ; SSE2-NEXT: psrlw $1, %xmm1
25226 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25227 ; SSE2-NEXT: psubb %xmm1, %xmm0
25228 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25229 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25230 ; SSE2-NEXT: pand %xmm1, %xmm2
25231 ; SSE2-NEXT: psrlw $2, %xmm0
25232 ; SSE2-NEXT: pand %xmm1, %xmm0
25233 ; SSE2-NEXT: paddb %xmm2, %xmm0
25234 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25235 ; SSE2-NEXT: psrlw $4, %xmm1
25236 ; SSE2-NEXT: paddb %xmm0, %xmm1
25237 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25238 ; SSE2-NEXT: pxor %xmm0, %xmm0
25239 ; SSE2-NEXT: psadbw %xmm0, %xmm1
25240 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25241 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25242 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
25243 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483673,2147483673]
25244 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
25245 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25246 ; SSE2-NEXT: pand %xmm2, %xmm1
25247 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
25248 ; SSE2-NEXT: por %xmm1, %xmm0
25251 ; SSE3-LABEL: ult_25_v2i64:
25253 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25254 ; SSE3-NEXT: psrlw $1, %xmm1
25255 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25256 ; SSE3-NEXT: psubb %xmm1, %xmm0
25257 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25258 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25259 ; SSE3-NEXT: pand %xmm1, %xmm2
25260 ; SSE3-NEXT: psrlw $2, %xmm0
25261 ; SSE3-NEXT: pand %xmm1, %xmm0
25262 ; SSE3-NEXT: paddb %xmm2, %xmm0
25263 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25264 ; SSE3-NEXT: psrlw $4, %xmm1
25265 ; SSE3-NEXT: paddb %xmm0, %xmm1
25266 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25267 ; SSE3-NEXT: pxor %xmm0, %xmm0
25268 ; SSE3-NEXT: psadbw %xmm0, %xmm1
25269 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25270 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25271 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
25272 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483673,2147483673]
25273 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
25274 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25275 ; SSE3-NEXT: pand %xmm2, %xmm1
25276 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
25277 ; SSE3-NEXT: por %xmm1, %xmm0
25280 ; SSSE3-LABEL: ult_25_v2i64:
25282 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25283 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25284 ; SSSE3-NEXT: pand %xmm1, %xmm2
25285 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25286 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25287 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25288 ; SSSE3-NEXT: psrlw $4, %xmm0
25289 ; SSSE3-NEXT: pand %xmm1, %xmm0
25290 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25291 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25292 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25293 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
25294 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25295 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
25296 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
25297 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483673,2147483673]
25298 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
25299 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
25300 ; SSSE3-NEXT: pand %xmm1, %xmm2
25301 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
25302 ; SSSE3-NEXT: por %xmm2, %xmm0
25305 ; SSE41-LABEL: ult_25_v2i64:
25307 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25308 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25309 ; SSE41-NEXT: pand %xmm1, %xmm2
25310 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25311 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25312 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25313 ; SSE41-NEXT: psrlw $4, %xmm0
25314 ; SSE41-NEXT: pand %xmm1, %xmm0
25315 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25316 ; SSE41-NEXT: paddb %xmm4, %xmm3
25317 ; SSE41-NEXT: pxor %xmm0, %xmm0
25318 ; SSE41-NEXT: psadbw %xmm0, %xmm3
25319 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25320 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
25321 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
25322 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483673,2147483673]
25323 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
25324 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
25325 ; SSE41-NEXT: pand %xmm1, %xmm2
25326 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
25327 ; SSE41-NEXT: por %xmm2, %xmm0
25330 ; AVX1-LABEL: ult_25_v2i64:
25332 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25333 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25334 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25335 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25336 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25337 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25338 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25339 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25340 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25341 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25342 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25]
25343 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25346 ; AVX2-LABEL: ult_25_v2i64:
25348 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25349 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25350 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25351 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25352 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25353 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25354 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25355 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25356 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25357 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25358 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25]
25359 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25362 ; AVX512VPOPCNTDQ-LABEL: ult_25_v2i64:
25363 ; AVX512VPOPCNTDQ: # %bb.0:
25364 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25365 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25366 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25]
25367 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25368 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25369 ; AVX512VPOPCNTDQ-NEXT: retq
25371 ; AVX512VPOPCNTDQVL-LABEL: ult_25_v2i64:
25372 ; AVX512VPOPCNTDQVL: # %bb.0:
25373 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25374 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25375 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25376 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25377 ; AVX512VPOPCNTDQVL-NEXT: retq
25379 ; BITALG_NOVLX-LABEL: ult_25_v2i64:
25380 ; BITALG_NOVLX: # %bb.0:
25381 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25382 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25383 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25384 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25385 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25]
25386 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25387 ; BITALG_NOVLX-NEXT: vzeroupper
25388 ; BITALG_NOVLX-NEXT: retq
25390 ; BITALG-LABEL: ult_25_v2i64:
25392 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25393 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25394 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25395 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25396 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25397 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25398 ; BITALG-NEXT: retq
25399 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25400 %3 = icmp ult <2 x i64> %2, <i64 25, i64 25>
25401 %4 = sext <2 x i1> %3 to <2 x i64>
25405 define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) {
25406 ; SSE2-LABEL: ugt_25_v2i64:
25408 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25409 ; SSE2-NEXT: psrlw $1, %xmm1
25410 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25411 ; SSE2-NEXT: psubb %xmm1, %xmm0
25412 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25413 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25414 ; SSE2-NEXT: pand %xmm1, %xmm2
25415 ; SSE2-NEXT: psrlw $2, %xmm0
25416 ; SSE2-NEXT: pand %xmm1, %xmm0
25417 ; SSE2-NEXT: paddb %xmm2, %xmm0
25418 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25419 ; SSE2-NEXT: psrlw $4, %xmm1
25420 ; SSE2-NEXT: paddb %xmm0, %xmm1
25421 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25422 ; SSE2-NEXT: pxor %xmm0, %xmm0
25423 ; SSE2-NEXT: psadbw %xmm0, %xmm1
25424 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25425 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25426 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
25427 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25428 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
25429 ; SSE2-NEXT: pand %xmm2, %xmm3
25430 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
25431 ; SSE2-NEXT: por %xmm3, %xmm0
25434 ; SSE3-LABEL: ugt_25_v2i64:
25436 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25437 ; SSE3-NEXT: psrlw $1, %xmm1
25438 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25439 ; SSE3-NEXT: psubb %xmm1, %xmm0
25440 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25441 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25442 ; SSE3-NEXT: pand %xmm1, %xmm2
25443 ; SSE3-NEXT: psrlw $2, %xmm0
25444 ; SSE3-NEXT: pand %xmm1, %xmm0
25445 ; SSE3-NEXT: paddb %xmm2, %xmm0
25446 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25447 ; SSE3-NEXT: psrlw $4, %xmm1
25448 ; SSE3-NEXT: paddb %xmm0, %xmm1
25449 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25450 ; SSE3-NEXT: pxor %xmm0, %xmm0
25451 ; SSE3-NEXT: psadbw %xmm0, %xmm1
25452 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25453 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25454 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
25455 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25456 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
25457 ; SSE3-NEXT: pand %xmm2, %xmm3
25458 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
25459 ; SSE3-NEXT: por %xmm3, %xmm0
25462 ; SSSE3-LABEL: ugt_25_v2i64:
25464 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25465 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25466 ; SSSE3-NEXT: pand %xmm1, %xmm2
25467 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25468 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25469 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25470 ; SSSE3-NEXT: psrlw $4, %xmm0
25471 ; SSSE3-NEXT: pand %xmm1, %xmm0
25472 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25473 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25474 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25475 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
25476 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25477 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
25478 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
25479 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25480 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
25481 ; SSSE3-NEXT: pand %xmm1, %xmm2
25482 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
25483 ; SSSE3-NEXT: por %xmm2, %xmm0
25486 ; SSE41-LABEL: ugt_25_v2i64:
25488 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25489 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25490 ; SSE41-NEXT: pand %xmm1, %xmm2
25491 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25492 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25493 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25494 ; SSE41-NEXT: psrlw $4, %xmm0
25495 ; SSE41-NEXT: pand %xmm1, %xmm0
25496 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25497 ; SSE41-NEXT: paddb %xmm4, %xmm3
25498 ; SSE41-NEXT: pxor %xmm0, %xmm0
25499 ; SSE41-NEXT: psadbw %xmm0, %xmm3
25500 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25501 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
25502 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
25503 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25504 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
25505 ; SSE41-NEXT: pand %xmm1, %xmm2
25506 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
25507 ; SSE41-NEXT: por %xmm2, %xmm0
25510 ; AVX1-LABEL: ugt_25_v2i64:
25512 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25513 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25514 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25515 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25516 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25517 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25518 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25519 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25520 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25521 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25522 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25525 ; AVX2-LABEL: ugt_25_v2i64:
25527 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25528 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25529 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25530 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25531 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25532 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25533 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25534 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25535 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25536 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25537 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25540 ; AVX512VPOPCNTDQ-LABEL: ugt_25_v2i64:
25541 ; AVX512VPOPCNTDQ: # %bb.0:
25542 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25543 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25544 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25545 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25546 ; AVX512VPOPCNTDQ-NEXT: retq
25548 ; AVX512VPOPCNTDQVL-LABEL: ugt_25_v2i64:
25549 ; AVX512VPOPCNTDQVL: # %bb.0:
25550 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25551 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25552 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25553 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25554 ; AVX512VPOPCNTDQVL-NEXT: retq
25556 ; BITALG_NOVLX-LABEL: ugt_25_v2i64:
25557 ; BITALG_NOVLX: # %bb.0:
25558 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25559 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25560 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25561 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25562 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25563 ; BITALG_NOVLX-NEXT: vzeroupper
25564 ; BITALG_NOVLX-NEXT: retq
25566 ; BITALG-LABEL: ugt_25_v2i64:
25568 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25569 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25570 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25571 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25572 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25573 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25574 ; BITALG-NEXT: retq
25575 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25576 %3 = icmp ugt <2 x i64> %2, <i64 25, i64 25>
25577 %4 = sext <2 x i1> %3 to <2 x i64>
25581 define <2 x i64> @ult_26_v2i64(<2 x i64> %0) {
25582 ; SSE2-LABEL: ult_26_v2i64:
25584 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25585 ; SSE2-NEXT: psrlw $1, %xmm1
25586 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25587 ; SSE2-NEXT: psubb %xmm1, %xmm0
25588 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25589 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25590 ; SSE2-NEXT: pand %xmm1, %xmm2
25591 ; SSE2-NEXT: psrlw $2, %xmm0
25592 ; SSE2-NEXT: pand %xmm1, %xmm0
25593 ; SSE2-NEXT: paddb %xmm2, %xmm0
25594 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25595 ; SSE2-NEXT: psrlw $4, %xmm1
25596 ; SSE2-NEXT: paddb %xmm0, %xmm1
25597 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25598 ; SSE2-NEXT: pxor %xmm0, %xmm0
25599 ; SSE2-NEXT: psadbw %xmm0, %xmm1
25600 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25601 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25602 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
25603 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483674,2147483674]
25604 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
25605 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25606 ; SSE2-NEXT: pand %xmm2, %xmm1
25607 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
25608 ; SSE2-NEXT: por %xmm1, %xmm0
25611 ; SSE3-LABEL: ult_26_v2i64:
25613 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25614 ; SSE3-NEXT: psrlw $1, %xmm1
25615 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25616 ; SSE3-NEXT: psubb %xmm1, %xmm0
25617 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25618 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25619 ; SSE3-NEXT: pand %xmm1, %xmm2
25620 ; SSE3-NEXT: psrlw $2, %xmm0
25621 ; SSE3-NEXT: pand %xmm1, %xmm0
25622 ; SSE3-NEXT: paddb %xmm2, %xmm0
25623 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25624 ; SSE3-NEXT: psrlw $4, %xmm1
25625 ; SSE3-NEXT: paddb %xmm0, %xmm1
25626 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25627 ; SSE3-NEXT: pxor %xmm0, %xmm0
25628 ; SSE3-NEXT: psadbw %xmm0, %xmm1
25629 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25630 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25631 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
25632 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483674,2147483674]
25633 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
25634 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25635 ; SSE3-NEXT: pand %xmm2, %xmm1
25636 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
25637 ; SSE3-NEXT: por %xmm1, %xmm0
25640 ; SSSE3-LABEL: ult_26_v2i64:
25642 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25643 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25644 ; SSSE3-NEXT: pand %xmm1, %xmm2
25645 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25646 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25647 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25648 ; SSSE3-NEXT: psrlw $4, %xmm0
25649 ; SSSE3-NEXT: pand %xmm1, %xmm0
25650 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25651 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25652 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25653 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
25654 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25655 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
25656 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
25657 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483674,2147483674]
25658 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
25659 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
25660 ; SSSE3-NEXT: pand %xmm1, %xmm2
25661 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
25662 ; SSSE3-NEXT: por %xmm2, %xmm0
25665 ; SSE41-LABEL: ult_26_v2i64:
25667 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25668 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25669 ; SSE41-NEXT: pand %xmm1, %xmm2
25670 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25671 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25672 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25673 ; SSE41-NEXT: psrlw $4, %xmm0
25674 ; SSE41-NEXT: pand %xmm1, %xmm0
25675 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25676 ; SSE41-NEXT: paddb %xmm4, %xmm3
25677 ; SSE41-NEXT: pxor %xmm0, %xmm0
25678 ; SSE41-NEXT: psadbw %xmm0, %xmm3
25679 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25680 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
25681 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
25682 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483674,2147483674]
25683 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
25684 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
25685 ; SSE41-NEXT: pand %xmm1, %xmm2
25686 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
25687 ; SSE41-NEXT: por %xmm2, %xmm0
25690 ; AVX1-LABEL: ult_26_v2i64:
25692 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25693 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25694 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25695 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25696 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25697 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25698 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25699 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25700 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25701 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25702 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26]
25703 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25706 ; AVX2-LABEL: ult_26_v2i64:
25708 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25709 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25710 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25711 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25712 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25713 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25714 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25715 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25716 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25717 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25718 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26]
25719 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25722 ; AVX512VPOPCNTDQ-LABEL: ult_26_v2i64:
25723 ; AVX512VPOPCNTDQ: # %bb.0:
25724 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25725 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25726 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26]
25727 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25728 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25729 ; AVX512VPOPCNTDQ-NEXT: retq
25731 ; AVX512VPOPCNTDQVL-LABEL: ult_26_v2i64:
25732 ; AVX512VPOPCNTDQVL: # %bb.0:
25733 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25734 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25735 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25736 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25737 ; AVX512VPOPCNTDQVL-NEXT: retq
25739 ; BITALG_NOVLX-LABEL: ult_26_v2i64:
25740 ; BITALG_NOVLX: # %bb.0:
25741 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25742 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25743 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25744 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25745 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26]
25746 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25747 ; BITALG_NOVLX-NEXT: vzeroupper
25748 ; BITALG_NOVLX-NEXT: retq
25750 ; BITALG-LABEL: ult_26_v2i64:
25752 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25753 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25754 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25755 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25756 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25757 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25758 ; BITALG-NEXT: retq
25759 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25760 %3 = icmp ult <2 x i64> %2, <i64 26, i64 26>
25761 %4 = sext <2 x i1> %3 to <2 x i64>
25765 define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) {
25766 ; SSE2-LABEL: ugt_26_v2i64:
25768 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25769 ; SSE2-NEXT: psrlw $1, %xmm1
25770 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25771 ; SSE2-NEXT: psubb %xmm1, %xmm0
25772 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25773 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25774 ; SSE2-NEXT: pand %xmm1, %xmm2
25775 ; SSE2-NEXT: psrlw $2, %xmm0
25776 ; SSE2-NEXT: pand %xmm1, %xmm0
25777 ; SSE2-NEXT: paddb %xmm2, %xmm0
25778 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25779 ; SSE2-NEXT: psrlw $4, %xmm1
25780 ; SSE2-NEXT: paddb %xmm0, %xmm1
25781 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25782 ; SSE2-NEXT: pxor %xmm0, %xmm0
25783 ; SSE2-NEXT: psadbw %xmm0, %xmm1
25784 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25785 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25786 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
25787 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25788 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
25789 ; SSE2-NEXT: pand %xmm2, %xmm3
25790 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
25791 ; SSE2-NEXT: por %xmm3, %xmm0
25794 ; SSE3-LABEL: ugt_26_v2i64:
25796 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25797 ; SSE3-NEXT: psrlw $1, %xmm1
25798 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25799 ; SSE3-NEXT: psubb %xmm1, %xmm0
25800 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25801 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25802 ; SSE3-NEXT: pand %xmm1, %xmm2
25803 ; SSE3-NEXT: psrlw $2, %xmm0
25804 ; SSE3-NEXT: pand %xmm1, %xmm0
25805 ; SSE3-NEXT: paddb %xmm2, %xmm0
25806 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25807 ; SSE3-NEXT: psrlw $4, %xmm1
25808 ; SSE3-NEXT: paddb %xmm0, %xmm1
25809 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25810 ; SSE3-NEXT: pxor %xmm0, %xmm0
25811 ; SSE3-NEXT: psadbw %xmm0, %xmm1
25812 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25813 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25814 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
25815 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25816 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
25817 ; SSE3-NEXT: pand %xmm2, %xmm3
25818 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
25819 ; SSE3-NEXT: por %xmm3, %xmm0
25822 ; SSSE3-LABEL: ugt_26_v2i64:
25824 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25825 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25826 ; SSSE3-NEXT: pand %xmm1, %xmm2
25827 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25828 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25829 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25830 ; SSSE3-NEXT: psrlw $4, %xmm0
25831 ; SSSE3-NEXT: pand %xmm1, %xmm0
25832 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25833 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25834 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25835 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
25836 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25837 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
25838 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
25839 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25840 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
25841 ; SSSE3-NEXT: pand %xmm1, %xmm2
25842 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
25843 ; SSSE3-NEXT: por %xmm2, %xmm0
25846 ; SSE41-LABEL: ugt_26_v2i64:
25848 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25849 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25850 ; SSE41-NEXT: pand %xmm1, %xmm2
25851 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25852 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25853 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25854 ; SSE41-NEXT: psrlw $4, %xmm0
25855 ; SSE41-NEXT: pand %xmm1, %xmm0
25856 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25857 ; SSE41-NEXT: paddb %xmm4, %xmm3
25858 ; SSE41-NEXT: pxor %xmm0, %xmm0
25859 ; SSE41-NEXT: psadbw %xmm0, %xmm3
25860 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25861 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
25862 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
25863 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
25864 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
25865 ; SSE41-NEXT: pand %xmm1, %xmm2
25866 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
25867 ; SSE41-NEXT: por %xmm2, %xmm0
25870 ; AVX1-LABEL: ugt_26_v2i64:
25872 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25873 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25874 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25875 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25876 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25877 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25878 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25879 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25880 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25881 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25882 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25885 ; AVX2-LABEL: ugt_26_v2i64:
25887 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25888 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25889 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25890 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25891 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25892 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25893 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25894 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25895 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25896 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25897 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25900 ; AVX512VPOPCNTDQ-LABEL: ugt_26_v2i64:
25901 ; AVX512VPOPCNTDQ: # %bb.0:
25902 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25903 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25904 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25905 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25906 ; AVX512VPOPCNTDQ-NEXT: retq
25908 ; AVX512VPOPCNTDQVL-LABEL: ugt_26_v2i64:
25909 ; AVX512VPOPCNTDQVL: # %bb.0:
25910 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25911 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25912 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25913 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25914 ; AVX512VPOPCNTDQVL-NEXT: retq
25916 ; BITALG_NOVLX-LABEL: ugt_26_v2i64:
25917 ; BITALG_NOVLX: # %bb.0:
25918 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25919 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25920 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25921 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25922 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25923 ; BITALG_NOVLX-NEXT: vzeroupper
25924 ; BITALG_NOVLX-NEXT: retq
25926 ; BITALG-LABEL: ugt_26_v2i64:
25928 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25929 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25930 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25931 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
25932 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
25933 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
25934 ; BITALG-NEXT: retq
25935 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25936 %3 = icmp ugt <2 x i64> %2, <i64 26, i64 26>
25937 %4 = sext <2 x i1> %3 to <2 x i64>
25941 define <2 x i64> @ult_27_v2i64(<2 x i64> %0) {
25942 ; SSE2-LABEL: ult_27_v2i64:
25944 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25945 ; SSE2-NEXT: psrlw $1, %xmm1
25946 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25947 ; SSE2-NEXT: psubb %xmm1, %xmm0
25948 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25949 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25950 ; SSE2-NEXT: pand %xmm1, %xmm2
25951 ; SSE2-NEXT: psrlw $2, %xmm0
25952 ; SSE2-NEXT: pand %xmm1, %xmm0
25953 ; SSE2-NEXT: paddb %xmm2, %xmm0
25954 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25955 ; SSE2-NEXT: psrlw $4, %xmm1
25956 ; SSE2-NEXT: paddb %xmm0, %xmm1
25957 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25958 ; SSE2-NEXT: pxor %xmm0, %xmm0
25959 ; SSE2-NEXT: psadbw %xmm0, %xmm1
25960 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25961 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25962 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
25963 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483675,2147483675]
25964 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
25965 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25966 ; SSE2-NEXT: pand %xmm2, %xmm1
25967 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
25968 ; SSE2-NEXT: por %xmm1, %xmm0
25971 ; SSE3-LABEL: ult_27_v2i64:
25973 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25974 ; SSE3-NEXT: psrlw $1, %xmm1
25975 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25976 ; SSE3-NEXT: psubb %xmm1, %xmm0
25977 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25978 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25979 ; SSE3-NEXT: pand %xmm1, %xmm2
25980 ; SSE3-NEXT: psrlw $2, %xmm0
25981 ; SSE3-NEXT: pand %xmm1, %xmm0
25982 ; SSE3-NEXT: paddb %xmm2, %xmm0
25983 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25984 ; SSE3-NEXT: psrlw $4, %xmm1
25985 ; SSE3-NEXT: paddb %xmm0, %xmm1
25986 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25987 ; SSE3-NEXT: pxor %xmm0, %xmm0
25988 ; SSE3-NEXT: psadbw %xmm0, %xmm1
25989 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25990 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
25991 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
25992 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483675,2147483675]
25993 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
25994 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25995 ; SSE3-NEXT: pand %xmm2, %xmm1
25996 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
25997 ; SSE3-NEXT: por %xmm1, %xmm0
26000 ; SSSE3-LABEL: ult_27_v2i64:
26002 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26003 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26004 ; SSSE3-NEXT: pand %xmm1, %xmm2
26005 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26006 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26007 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26008 ; SSSE3-NEXT: psrlw $4, %xmm0
26009 ; SSSE3-NEXT: pand %xmm1, %xmm0
26010 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26011 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26012 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26013 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
26014 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26015 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26016 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
26017 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483675,2147483675]
26018 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
26019 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
26020 ; SSSE3-NEXT: pand %xmm1, %xmm2
26021 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
26022 ; SSSE3-NEXT: por %xmm2, %xmm0
26025 ; SSE41-LABEL: ult_27_v2i64:
26027 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26028 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26029 ; SSE41-NEXT: pand %xmm1, %xmm2
26030 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26031 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26032 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26033 ; SSE41-NEXT: psrlw $4, %xmm0
26034 ; SSE41-NEXT: pand %xmm1, %xmm0
26035 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26036 ; SSE41-NEXT: paddb %xmm4, %xmm3
26037 ; SSE41-NEXT: pxor %xmm0, %xmm0
26038 ; SSE41-NEXT: psadbw %xmm0, %xmm3
26039 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26040 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26041 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
26042 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483675,2147483675]
26043 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
26044 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
26045 ; SSE41-NEXT: pand %xmm1, %xmm2
26046 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
26047 ; SSE41-NEXT: por %xmm2, %xmm0
26050 ; AVX1-LABEL: ult_27_v2i64:
26052 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26053 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26054 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26055 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26056 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26057 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26058 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26059 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26060 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26061 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26062 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27]
26063 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26066 ; AVX2-LABEL: ult_27_v2i64:
26068 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26069 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26070 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26071 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26072 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26073 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26074 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26075 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26076 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26077 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26078 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27]
26079 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26082 ; AVX512VPOPCNTDQ-LABEL: ult_27_v2i64:
26083 ; AVX512VPOPCNTDQ: # %bb.0:
26084 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26085 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26086 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27]
26087 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26088 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26089 ; AVX512VPOPCNTDQ-NEXT: retq
26091 ; AVX512VPOPCNTDQVL-LABEL: ult_27_v2i64:
26092 ; AVX512VPOPCNTDQVL: # %bb.0:
26093 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26094 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26095 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26096 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26097 ; AVX512VPOPCNTDQVL-NEXT: retq
26099 ; BITALG_NOVLX-LABEL: ult_27_v2i64:
26100 ; BITALG_NOVLX: # %bb.0:
26101 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26102 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26103 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26104 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26105 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27]
26106 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26107 ; BITALG_NOVLX-NEXT: vzeroupper
26108 ; BITALG_NOVLX-NEXT: retq
26110 ; BITALG-LABEL: ult_27_v2i64:
26112 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26113 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26114 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26115 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26116 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26117 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26118 ; BITALG-NEXT: retq
26119 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26120 %3 = icmp ult <2 x i64> %2, <i64 27, i64 27>
26121 %4 = sext <2 x i1> %3 to <2 x i64>
26125 define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) {
26126 ; SSE2-LABEL: ugt_27_v2i64:
26128 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26129 ; SSE2-NEXT: psrlw $1, %xmm1
26130 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26131 ; SSE2-NEXT: psubb %xmm1, %xmm0
26132 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26133 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26134 ; SSE2-NEXT: pand %xmm1, %xmm2
26135 ; SSE2-NEXT: psrlw $2, %xmm0
26136 ; SSE2-NEXT: pand %xmm1, %xmm0
26137 ; SSE2-NEXT: paddb %xmm2, %xmm0
26138 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26139 ; SSE2-NEXT: psrlw $4, %xmm1
26140 ; SSE2-NEXT: paddb %xmm0, %xmm1
26141 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26142 ; SSE2-NEXT: pxor %xmm0, %xmm0
26143 ; SSE2-NEXT: psadbw %xmm0, %xmm1
26144 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26145 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
26146 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
26147 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26148 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
26149 ; SSE2-NEXT: pand %xmm2, %xmm3
26150 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
26151 ; SSE2-NEXT: por %xmm3, %xmm0
26154 ; SSE3-LABEL: ugt_27_v2i64:
26156 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26157 ; SSE3-NEXT: psrlw $1, %xmm1
26158 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26159 ; SSE3-NEXT: psubb %xmm1, %xmm0
26160 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26161 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26162 ; SSE3-NEXT: pand %xmm1, %xmm2
26163 ; SSE3-NEXT: psrlw $2, %xmm0
26164 ; SSE3-NEXT: pand %xmm1, %xmm0
26165 ; SSE3-NEXT: paddb %xmm2, %xmm0
26166 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26167 ; SSE3-NEXT: psrlw $4, %xmm1
26168 ; SSE3-NEXT: paddb %xmm0, %xmm1
26169 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26170 ; SSE3-NEXT: pxor %xmm0, %xmm0
26171 ; SSE3-NEXT: psadbw %xmm0, %xmm1
26172 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26173 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
26174 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
26175 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26176 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
26177 ; SSE3-NEXT: pand %xmm2, %xmm3
26178 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
26179 ; SSE3-NEXT: por %xmm3, %xmm0
26182 ; SSSE3-LABEL: ugt_27_v2i64:
26184 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26185 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26186 ; SSSE3-NEXT: pand %xmm1, %xmm2
26187 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26188 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26189 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26190 ; SSSE3-NEXT: psrlw $4, %xmm0
26191 ; SSSE3-NEXT: pand %xmm1, %xmm0
26192 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26193 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26194 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26195 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
26196 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26197 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26198 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
26199 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26200 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
26201 ; SSSE3-NEXT: pand %xmm1, %xmm2
26202 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
26203 ; SSSE3-NEXT: por %xmm2, %xmm0
26206 ; SSE41-LABEL: ugt_27_v2i64:
26208 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26209 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26210 ; SSE41-NEXT: pand %xmm1, %xmm2
26211 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26212 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26213 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26214 ; SSE41-NEXT: psrlw $4, %xmm0
26215 ; SSE41-NEXT: pand %xmm1, %xmm0
26216 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26217 ; SSE41-NEXT: paddb %xmm4, %xmm3
26218 ; SSE41-NEXT: pxor %xmm0, %xmm0
26219 ; SSE41-NEXT: psadbw %xmm0, %xmm3
26220 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26221 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26222 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
26223 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26224 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
26225 ; SSE41-NEXT: pand %xmm1, %xmm2
26226 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
26227 ; SSE41-NEXT: por %xmm2, %xmm0
26230 ; AVX1-LABEL: ugt_27_v2i64:
26232 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26233 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26234 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26235 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26236 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26237 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26238 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26239 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26240 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26241 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26242 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26245 ; AVX2-LABEL: ugt_27_v2i64:
26247 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26248 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26249 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26250 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26251 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26252 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26253 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26254 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26255 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26256 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26257 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26260 ; AVX512VPOPCNTDQ-LABEL: ugt_27_v2i64:
26261 ; AVX512VPOPCNTDQ: # %bb.0:
26262 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26263 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26264 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26265 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26266 ; AVX512VPOPCNTDQ-NEXT: retq
26268 ; AVX512VPOPCNTDQVL-LABEL: ugt_27_v2i64:
26269 ; AVX512VPOPCNTDQVL: # %bb.0:
26270 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26271 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26272 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26273 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26274 ; AVX512VPOPCNTDQVL-NEXT: retq
26276 ; BITALG_NOVLX-LABEL: ugt_27_v2i64:
26277 ; BITALG_NOVLX: # %bb.0:
26278 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26279 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26280 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26281 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26282 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26283 ; BITALG_NOVLX-NEXT: vzeroupper
26284 ; BITALG_NOVLX-NEXT: retq
26286 ; BITALG-LABEL: ugt_27_v2i64:
26288 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26289 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26290 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26291 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26292 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26293 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26294 ; BITALG-NEXT: retq
26295 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26296 %3 = icmp ugt <2 x i64> %2, <i64 27, i64 27>
26297 %4 = sext <2 x i1> %3 to <2 x i64>
26301 define <2 x i64> @ult_28_v2i64(<2 x i64> %0) {
26302 ; SSE2-LABEL: ult_28_v2i64:
26304 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26305 ; SSE2-NEXT: psrlw $1, %xmm1
26306 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26307 ; SSE2-NEXT: psubb %xmm1, %xmm0
26308 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26309 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26310 ; SSE2-NEXT: pand %xmm1, %xmm2
26311 ; SSE2-NEXT: psrlw $2, %xmm0
26312 ; SSE2-NEXT: pand %xmm1, %xmm0
26313 ; SSE2-NEXT: paddb %xmm2, %xmm0
26314 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26315 ; SSE2-NEXT: psrlw $4, %xmm1
26316 ; SSE2-NEXT: paddb %xmm0, %xmm1
26317 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26318 ; SSE2-NEXT: pxor %xmm0, %xmm0
26319 ; SSE2-NEXT: psadbw %xmm0, %xmm1
26320 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26321 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
26322 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
26323 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483676,2147483676]
26324 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
26325 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26326 ; SSE2-NEXT: pand %xmm2, %xmm1
26327 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
26328 ; SSE2-NEXT: por %xmm1, %xmm0
26331 ; SSE3-LABEL: ult_28_v2i64:
26333 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26334 ; SSE3-NEXT: psrlw $1, %xmm1
26335 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26336 ; SSE3-NEXT: psubb %xmm1, %xmm0
26337 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26338 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26339 ; SSE3-NEXT: pand %xmm1, %xmm2
26340 ; SSE3-NEXT: psrlw $2, %xmm0
26341 ; SSE3-NEXT: pand %xmm1, %xmm0
26342 ; SSE3-NEXT: paddb %xmm2, %xmm0
26343 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26344 ; SSE3-NEXT: psrlw $4, %xmm1
26345 ; SSE3-NEXT: paddb %xmm0, %xmm1
26346 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26347 ; SSE3-NEXT: pxor %xmm0, %xmm0
26348 ; SSE3-NEXT: psadbw %xmm0, %xmm1
26349 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26350 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
26351 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
26352 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483676,2147483676]
26353 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
26354 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26355 ; SSE3-NEXT: pand %xmm2, %xmm1
26356 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
26357 ; SSE3-NEXT: por %xmm1, %xmm0
26360 ; SSSE3-LABEL: ult_28_v2i64:
26362 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26363 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26364 ; SSSE3-NEXT: pand %xmm1, %xmm2
26365 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26366 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26367 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26368 ; SSSE3-NEXT: psrlw $4, %xmm0
26369 ; SSSE3-NEXT: pand %xmm1, %xmm0
26370 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26371 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26372 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26373 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
26374 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26375 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26376 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
26377 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483676,2147483676]
26378 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
26379 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
26380 ; SSSE3-NEXT: pand %xmm1, %xmm2
26381 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
26382 ; SSSE3-NEXT: por %xmm2, %xmm0
26385 ; SSE41-LABEL: ult_28_v2i64:
26387 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26388 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26389 ; SSE41-NEXT: pand %xmm1, %xmm2
26390 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26391 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26392 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26393 ; SSE41-NEXT: psrlw $4, %xmm0
26394 ; SSE41-NEXT: pand %xmm1, %xmm0
26395 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26396 ; SSE41-NEXT: paddb %xmm4, %xmm3
26397 ; SSE41-NEXT: pxor %xmm0, %xmm0
26398 ; SSE41-NEXT: psadbw %xmm0, %xmm3
26399 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26400 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26401 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
26402 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483676,2147483676]
26403 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
26404 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
26405 ; SSE41-NEXT: pand %xmm1, %xmm2
26406 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
26407 ; SSE41-NEXT: por %xmm2, %xmm0
26410 ; AVX1-LABEL: ult_28_v2i64:
26412 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26413 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26414 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26415 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26416 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26417 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26418 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26419 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26420 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26421 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26422 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28]
26423 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26426 ; AVX2-LABEL: ult_28_v2i64:
26428 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26429 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26430 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26431 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26432 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26433 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26434 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26435 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26436 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26437 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26438 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28]
26439 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26442 ; AVX512VPOPCNTDQ-LABEL: ult_28_v2i64:
26443 ; AVX512VPOPCNTDQ: # %bb.0:
26444 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26445 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26446 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28]
26447 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26448 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26449 ; AVX512VPOPCNTDQ-NEXT: retq
26451 ; AVX512VPOPCNTDQVL-LABEL: ult_28_v2i64:
26452 ; AVX512VPOPCNTDQVL: # %bb.0:
26453 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26454 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26455 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26456 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26457 ; AVX512VPOPCNTDQVL-NEXT: retq
26459 ; BITALG_NOVLX-LABEL: ult_28_v2i64:
26460 ; BITALG_NOVLX: # %bb.0:
26461 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26462 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26463 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26464 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26465 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28]
26466 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26467 ; BITALG_NOVLX-NEXT: vzeroupper
26468 ; BITALG_NOVLX-NEXT: retq
26470 ; BITALG-LABEL: ult_28_v2i64:
26472 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26473 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26474 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26475 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26476 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26477 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26478 ; BITALG-NEXT: retq
26479 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26480 %3 = icmp ult <2 x i64> %2, <i64 28, i64 28>
26481 %4 = sext <2 x i1> %3 to <2 x i64>
26485 define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) {
26486 ; SSE2-LABEL: ugt_28_v2i64:
26488 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26489 ; SSE2-NEXT: psrlw $1, %xmm1
26490 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26491 ; SSE2-NEXT: psubb %xmm1, %xmm0
26492 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26493 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26494 ; SSE2-NEXT: pand %xmm1, %xmm2
26495 ; SSE2-NEXT: psrlw $2, %xmm0
26496 ; SSE2-NEXT: pand %xmm1, %xmm0
26497 ; SSE2-NEXT: paddb %xmm2, %xmm0
26498 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26499 ; SSE2-NEXT: psrlw $4, %xmm1
26500 ; SSE2-NEXT: paddb %xmm0, %xmm1
26501 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26502 ; SSE2-NEXT: pxor %xmm0, %xmm0
26503 ; SSE2-NEXT: psadbw %xmm0, %xmm1
26504 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26505 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
26506 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
26507 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26508 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
26509 ; SSE2-NEXT: pand %xmm2, %xmm3
26510 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
26511 ; SSE2-NEXT: por %xmm3, %xmm0
26514 ; SSE3-LABEL: ugt_28_v2i64:
26516 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26517 ; SSE3-NEXT: psrlw $1, %xmm1
26518 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26519 ; SSE3-NEXT: psubb %xmm1, %xmm0
26520 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26521 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26522 ; SSE3-NEXT: pand %xmm1, %xmm2
26523 ; SSE3-NEXT: psrlw $2, %xmm0
26524 ; SSE3-NEXT: pand %xmm1, %xmm0
26525 ; SSE3-NEXT: paddb %xmm2, %xmm0
26526 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26527 ; SSE3-NEXT: psrlw $4, %xmm1
26528 ; SSE3-NEXT: paddb %xmm0, %xmm1
26529 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26530 ; SSE3-NEXT: pxor %xmm0, %xmm0
26531 ; SSE3-NEXT: psadbw %xmm0, %xmm1
26532 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26533 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
26534 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
26535 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26536 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
26537 ; SSE3-NEXT: pand %xmm2, %xmm3
26538 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
26539 ; SSE3-NEXT: por %xmm3, %xmm0
26542 ; SSSE3-LABEL: ugt_28_v2i64:
26544 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26545 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26546 ; SSSE3-NEXT: pand %xmm1, %xmm2
26547 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26548 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26549 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26550 ; SSSE3-NEXT: psrlw $4, %xmm0
26551 ; SSSE3-NEXT: pand %xmm1, %xmm0
26552 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26553 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26554 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26555 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
26556 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26557 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26558 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
26559 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26560 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
26561 ; SSSE3-NEXT: pand %xmm1, %xmm2
26562 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
26563 ; SSSE3-NEXT: por %xmm2, %xmm0
26566 ; SSE41-LABEL: ugt_28_v2i64:
26568 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26569 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26570 ; SSE41-NEXT: pand %xmm1, %xmm2
26571 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26572 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26573 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26574 ; SSE41-NEXT: psrlw $4, %xmm0
26575 ; SSE41-NEXT: pand %xmm1, %xmm0
26576 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26577 ; SSE41-NEXT: paddb %xmm4, %xmm3
26578 ; SSE41-NEXT: pxor %xmm0, %xmm0
26579 ; SSE41-NEXT: psadbw %xmm0, %xmm3
26580 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26581 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26582 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
26583 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26584 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
26585 ; SSE41-NEXT: pand %xmm1, %xmm2
26586 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
26587 ; SSE41-NEXT: por %xmm2, %xmm0
26590 ; AVX1-LABEL: ugt_28_v2i64:
26592 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26593 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26594 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26595 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26596 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26597 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26598 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26599 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26600 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26601 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26602 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26605 ; AVX2-LABEL: ugt_28_v2i64:
26607 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26608 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26609 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26610 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26611 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26612 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26613 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26614 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26615 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26616 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26617 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26620 ; AVX512VPOPCNTDQ-LABEL: ugt_28_v2i64:
26621 ; AVX512VPOPCNTDQ: # %bb.0:
26622 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26623 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26624 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26625 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26626 ; AVX512VPOPCNTDQ-NEXT: retq
26628 ; AVX512VPOPCNTDQVL-LABEL: ugt_28_v2i64:
26629 ; AVX512VPOPCNTDQVL: # %bb.0:
26630 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26631 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26632 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26633 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26634 ; AVX512VPOPCNTDQVL-NEXT: retq
26636 ; BITALG_NOVLX-LABEL: ugt_28_v2i64:
26637 ; BITALG_NOVLX: # %bb.0:
26638 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26639 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26640 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26641 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26642 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26643 ; BITALG_NOVLX-NEXT: vzeroupper
26644 ; BITALG_NOVLX-NEXT: retq
26646 ; BITALG-LABEL: ugt_28_v2i64:
26648 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26649 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26650 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26651 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26652 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26653 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26654 ; BITALG-NEXT: retq
26655 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26656 %3 = icmp ugt <2 x i64> %2, <i64 28, i64 28>
26657 %4 = sext <2 x i1> %3 to <2 x i64>
26661 define <2 x i64> @ult_29_v2i64(<2 x i64> %0) {
26662 ; SSE2-LABEL: ult_29_v2i64:
26664 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26665 ; SSE2-NEXT: psrlw $1, %xmm1
26666 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26667 ; SSE2-NEXT: psubb %xmm1, %xmm0
26668 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26669 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26670 ; SSE2-NEXT: pand %xmm1, %xmm2
26671 ; SSE2-NEXT: psrlw $2, %xmm0
26672 ; SSE2-NEXT: pand %xmm1, %xmm0
26673 ; SSE2-NEXT: paddb %xmm2, %xmm0
26674 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26675 ; SSE2-NEXT: psrlw $4, %xmm1
26676 ; SSE2-NEXT: paddb %xmm0, %xmm1
26677 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26678 ; SSE2-NEXT: pxor %xmm0, %xmm0
26679 ; SSE2-NEXT: psadbw %xmm0, %xmm1
26680 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26681 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
26682 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
26683 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483677,2147483677]
26684 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
26685 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26686 ; SSE2-NEXT: pand %xmm2, %xmm1
26687 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
26688 ; SSE2-NEXT: por %xmm1, %xmm0
26691 ; SSE3-LABEL: ult_29_v2i64:
26693 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26694 ; SSE3-NEXT: psrlw $1, %xmm1
26695 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26696 ; SSE3-NEXT: psubb %xmm1, %xmm0
26697 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26698 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26699 ; SSE3-NEXT: pand %xmm1, %xmm2
26700 ; SSE3-NEXT: psrlw $2, %xmm0
26701 ; SSE3-NEXT: pand %xmm1, %xmm0
26702 ; SSE3-NEXT: paddb %xmm2, %xmm0
26703 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26704 ; SSE3-NEXT: psrlw $4, %xmm1
26705 ; SSE3-NEXT: paddb %xmm0, %xmm1
26706 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26707 ; SSE3-NEXT: pxor %xmm0, %xmm0
26708 ; SSE3-NEXT: psadbw %xmm0, %xmm1
26709 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26710 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
26711 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
26712 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483677,2147483677]
26713 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
26714 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26715 ; SSE3-NEXT: pand %xmm2, %xmm1
26716 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
26717 ; SSE3-NEXT: por %xmm1, %xmm0
26720 ; SSSE3-LABEL: ult_29_v2i64:
26722 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26723 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26724 ; SSSE3-NEXT: pand %xmm1, %xmm2
26725 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26726 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26727 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26728 ; SSSE3-NEXT: psrlw $4, %xmm0
26729 ; SSSE3-NEXT: pand %xmm1, %xmm0
26730 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26731 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26732 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26733 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
26734 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26735 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26736 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
26737 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483677,2147483677]
26738 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
26739 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
26740 ; SSSE3-NEXT: pand %xmm1, %xmm2
26741 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
26742 ; SSSE3-NEXT: por %xmm2, %xmm0
26745 ; SSE41-LABEL: ult_29_v2i64:
26747 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26748 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26749 ; SSE41-NEXT: pand %xmm1, %xmm2
26750 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26751 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26752 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26753 ; SSE41-NEXT: psrlw $4, %xmm0
26754 ; SSE41-NEXT: pand %xmm1, %xmm0
26755 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26756 ; SSE41-NEXT: paddb %xmm4, %xmm3
26757 ; SSE41-NEXT: pxor %xmm0, %xmm0
26758 ; SSE41-NEXT: psadbw %xmm0, %xmm3
26759 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26760 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26761 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
26762 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483677,2147483677]
26763 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
26764 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
26765 ; SSE41-NEXT: pand %xmm1, %xmm2
26766 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
26767 ; SSE41-NEXT: por %xmm2, %xmm0
26770 ; AVX1-LABEL: ult_29_v2i64:
26772 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26773 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26774 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26775 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26776 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26777 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26778 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26779 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26780 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26781 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26782 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29]
26783 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26786 ; AVX2-LABEL: ult_29_v2i64:
26788 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26789 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26790 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26791 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26792 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26793 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26794 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26795 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26796 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26797 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26798 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29]
26799 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26802 ; AVX512VPOPCNTDQ-LABEL: ult_29_v2i64:
26803 ; AVX512VPOPCNTDQ: # %bb.0:
26804 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26805 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26806 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29]
26807 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26808 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26809 ; AVX512VPOPCNTDQ-NEXT: retq
26811 ; AVX512VPOPCNTDQVL-LABEL: ult_29_v2i64:
26812 ; AVX512VPOPCNTDQVL: # %bb.0:
26813 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26814 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26815 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26816 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26817 ; AVX512VPOPCNTDQVL-NEXT: retq
26819 ; BITALG_NOVLX-LABEL: ult_29_v2i64:
26820 ; BITALG_NOVLX: # %bb.0:
26821 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26822 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26823 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26824 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26825 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29]
26826 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26827 ; BITALG_NOVLX-NEXT: vzeroupper
26828 ; BITALG_NOVLX-NEXT: retq
26830 ; BITALG-LABEL: ult_29_v2i64:
26832 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26833 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26834 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26835 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26836 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26837 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26838 ; BITALG-NEXT: retq
26839 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26840 %3 = icmp ult <2 x i64> %2, <i64 29, i64 29>
26841 %4 = sext <2 x i1> %3 to <2 x i64>
26845 define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) {
26846 ; SSE2-LABEL: ugt_29_v2i64:
26848 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26849 ; SSE2-NEXT: psrlw $1, %xmm1
26850 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26851 ; SSE2-NEXT: psubb %xmm1, %xmm0
26852 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26853 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26854 ; SSE2-NEXT: pand %xmm1, %xmm2
26855 ; SSE2-NEXT: psrlw $2, %xmm0
26856 ; SSE2-NEXT: pand %xmm1, %xmm0
26857 ; SSE2-NEXT: paddb %xmm2, %xmm0
26858 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26859 ; SSE2-NEXT: psrlw $4, %xmm1
26860 ; SSE2-NEXT: paddb %xmm0, %xmm1
26861 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26862 ; SSE2-NEXT: pxor %xmm0, %xmm0
26863 ; SSE2-NEXT: psadbw %xmm0, %xmm1
26864 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26865 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
26866 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
26867 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26868 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
26869 ; SSE2-NEXT: pand %xmm2, %xmm3
26870 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
26871 ; SSE2-NEXT: por %xmm3, %xmm0
26874 ; SSE3-LABEL: ugt_29_v2i64:
26876 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26877 ; SSE3-NEXT: psrlw $1, %xmm1
26878 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26879 ; SSE3-NEXT: psubb %xmm1, %xmm0
26880 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26881 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26882 ; SSE3-NEXT: pand %xmm1, %xmm2
26883 ; SSE3-NEXT: psrlw $2, %xmm0
26884 ; SSE3-NEXT: pand %xmm1, %xmm0
26885 ; SSE3-NEXT: paddb %xmm2, %xmm0
26886 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26887 ; SSE3-NEXT: psrlw $4, %xmm1
26888 ; SSE3-NEXT: paddb %xmm0, %xmm1
26889 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26890 ; SSE3-NEXT: pxor %xmm0, %xmm0
26891 ; SSE3-NEXT: psadbw %xmm0, %xmm1
26892 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26893 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
26894 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
26895 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26896 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
26897 ; SSE3-NEXT: pand %xmm2, %xmm3
26898 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
26899 ; SSE3-NEXT: por %xmm3, %xmm0
26902 ; SSSE3-LABEL: ugt_29_v2i64:
26904 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26905 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26906 ; SSSE3-NEXT: pand %xmm1, %xmm2
26907 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26908 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26909 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26910 ; SSSE3-NEXT: psrlw $4, %xmm0
26911 ; SSSE3-NEXT: pand %xmm1, %xmm0
26912 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26913 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26914 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26915 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
26916 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26917 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26918 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
26919 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26920 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
26921 ; SSSE3-NEXT: pand %xmm1, %xmm2
26922 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
26923 ; SSSE3-NEXT: por %xmm2, %xmm0
26926 ; SSE41-LABEL: ugt_29_v2i64:
26928 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26929 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26930 ; SSE41-NEXT: pand %xmm1, %xmm2
26931 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26932 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26933 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26934 ; SSE41-NEXT: psrlw $4, %xmm0
26935 ; SSE41-NEXT: pand %xmm1, %xmm0
26936 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26937 ; SSE41-NEXT: paddb %xmm4, %xmm3
26938 ; SSE41-NEXT: pxor %xmm0, %xmm0
26939 ; SSE41-NEXT: psadbw %xmm0, %xmm3
26940 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26941 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
26942 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
26943 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
26944 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
26945 ; SSE41-NEXT: pand %xmm1, %xmm2
26946 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
26947 ; SSE41-NEXT: por %xmm2, %xmm0
26950 ; AVX1-LABEL: ugt_29_v2i64:
26952 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26953 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26954 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26955 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26956 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26957 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26958 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26959 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26960 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26961 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26962 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26965 ; AVX2-LABEL: ugt_29_v2i64:
26967 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26968 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26969 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26970 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26971 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26972 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26973 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26974 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26975 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26976 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26977 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26980 ; AVX512VPOPCNTDQ-LABEL: ugt_29_v2i64:
26981 ; AVX512VPOPCNTDQ: # %bb.0:
26982 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26983 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26984 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26985 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26986 ; AVX512VPOPCNTDQ-NEXT: retq
26988 ; AVX512VPOPCNTDQVL-LABEL: ugt_29_v2i64:
26989 ; AVX512VPOPCNTDQVL: # %bb.0:
26990 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26991 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
26992 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26993 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
26994 ; AVX512VPOPCNTDQVL-NEXT: retq
26996 ; BITALG_NOVLX-LABEL: ugt_29_v2i64:
26997 ; BITALG_NOVLX: # %bb.0:
26998 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26999 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27000 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27001 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27002 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27003 ; BITALG_NOVLX-NEXT: vzeroupper
27004 ; BITALG_NOVLX-NEXT: retq
27006 ; BITALG-LABEL: ugt_29_v2i64:
27008 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27009 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27010 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27011 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27012 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27013 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27014 ; BITALG-NEXT: retq
27015 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27016 %3 = icmp ugt <2 x i64> %2, <i64 29, i64 29>
27017 %4 = sext <2 x i1> %3 to <2 x i64>
27021 define <2 x i64> @ult_30_v2i64(<2 x i64> %0) {
27022 ; SSE2-LABEL: ult_30_v2i64:
27024 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27025 ; SSE2-NEXT: psrlw $1, %xmm1
27026 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27027 ; SSE2-NEXT: psubb %xmm1, %xmm0
27028 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27029 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27030 ; SSE2-NEXT: pand %xmm1, %xmm2
27031 ; SSE2-NEXT: psrlw $2, %xmm0
27032 ; SSE2-NEXT: pand %xmm1, %xmm0
27033 ; SSE2-NEXT: paddb %xmm2, %xmm0
27034 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27035 ; SSE2-NEXT: psrlw $4, %xmm1
27036 ; SSE2-NEXT: paddb %xmm0, %xmm1
27037 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27038 ; SSE2-NEXT: pxor %xmm0, %xmm0
27039 ; SSE2-NEXT: psadbw %xmm0, %xmm1
27040 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27041 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27042 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
27043 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483678,2147483678]
27044 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
27045 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27046 ; SSE2-NEXT: pand %xmm2, %xmm1
27047 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27048 ; SSE2-NEXT: por %xmm1, %xmm0
27051 ; SSE3-LABEL: ult_30_v2i64:
27053 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27054 ; SSE3-NEXT: psrlw $1, %xmm1
27055 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27056 ; SSE3-NEXT: psubb %xmm1, %xmm0
27057 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27058 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27059 ; SSE3-NEXT: pand %xmm1, %xmm2
27060 ; SSE3-NEXT: psrlw $2, %xmm0
27061 ; SSE3-NEXT: pand %xmm1, %xmm0
27062 ; SSE3-NEXT: paddb %xmm2, %xmm0
27063 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27064 ; SSE3-NEXT: psrlw $4, %xmm1
27065 ; SSE3-NEXT: paddb %xmm0, %xmm1
27066 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27067 ; SSE3-NEXT: pxor %xmm0, %xmm0
27068 ; SSE3-NEXT: psadbw %xmm0, %xmm1
27069 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27070 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27071 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
27072 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483678,2147483678]
27073 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
27074 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27075 ; SSE3-NEXT: pand %xmm2, %xmm1
27076 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27077 ; SSE3-NEXT: por %xmm1, %xmm0
27080 ; SSSE3-LABEL: ult_30_v2i64:
27082 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27083 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27084 ; SSSE3-NEXT: pand %xmm1, %xmm2
27085 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27086 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27087 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27088 ; SSSE3-NEXT: psrlw $4, %xmm0
27089 ; SSSE3-NEXT: pand %xmm1, %xmm0
27090 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27091 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27092 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27093 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
27094 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27095 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27096 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
27097 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483678,2147483678]
27098 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
27099 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
27100 ; SSSE3-NEXT: pand %xmm1, %xmm2
27101 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27102 ; SSSE3-NEXT: por %xmm2, %xmm0
27105 ; SSE41-LABEL: ult_30_v2i64:
27107 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27108 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27109 ; SSE41-NEXT: pand %xmm1, %xmm2
27110 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27111 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27112 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27113 ; SSE41-NEXT: psrlw $4, %xmm0
27114 ; SSE41-NEXT: pand %xmm1, %xmm0
27115 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27116 ; SSE41-NEXT: paddb %xmm4, %xmm3
27117 ; SSE41-NEXT: pxor %xmm0, %xmm0
27118 ; SSE41-NEXT: psadbw %xmm0, %xmm3
27119 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27120 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27121 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
27122 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483678,2147483678]
27123 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
27124 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
27125 ; SSE41-NEXT: pand %xmm1, %xmm2
27126 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27127 ; SSE41-NEXT: por %xmm2, %xmm0
27130 ; AVX1-LABEL: ult_30_v2i64:
27132 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27133 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27134 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27135 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27136 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27137 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27138 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27139 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27140 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27141 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27142 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30]
27143 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27146 ; AVX2-LABEL: ult_30_v2i64:
27148 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27149 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27150 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27151 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27152 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27153 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27154 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27155 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27156 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27157 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27158 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30]
27159 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27162 ; AVX512VPOPCNTDQ-LABEL: ult_30_v2i64:
27163 ; AVX512VPOPCNTDQ: # %bb.0:
27164 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27165 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27166 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30]
27167 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27168 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27169 ; AVX512VPOPCNTDQ-NEXT: retq
27171 ; AVX512VPOPCNTDQVL-LABEL: ult_30_v2i64:
27172 ; AVX512VPOPCNTDQVL: # %bb.0:
27173 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27174 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27175 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27176 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27177 ; AVX512VPOPCNTDQVL-NEXT: retq
27179 ; BITALG_NOVLX-LABEL: ult_30_v2i64:
27180 ; BITALG_NOVLX: # %bb.0:
27181 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27182 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27183 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27184 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27185 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30]
27186 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27187 ; BITALG_NOVLX-NEXT: vzeroupper
27188 ; BITALG_NOVLX-NEXT: retq
27190 ; BITALG-LABEL: ult_30_v2i64:
27192 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27193 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27194 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27195 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27196 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27197 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27198 ; BITALG-NEXT: retq
27199 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27200 %3 = icmp ult <2 x i64> %2, <i64 30, i64 30>
27201 %4 = sext <2 x i1> %3 to <2 x i64>
27205 define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) {
27206 ; SSE2-LABEL: ugt_30_v2i64:
27208 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27209 ; SSE2-NEXT: psrlw $1, %xmm1
27210 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27211 ; SSE2-NEXT: psubb %xmm1, %xmm0
27212 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27213 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27214 ; SSE2-NEXT: pand %xmm1, %xmm2
27215 ; SSE2-NEXT: psrlw $2, %xmm0
27216 ; SSE2-NEXT: pand %xmm1, %xmm0
27217 ; SSE2-NEXT: paddb %xmm2, %xmm0
27218 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27219 ; SSE2-NEXT: psrlw $4, %xmm1
27220 ; SSE2-NEXT: paddb %xmm0, %xmm1
27221 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27222 ; SSE2-NEXT: pxor %xmm0, %xmm0
27223 ; SSE2-NEXT: psadbw %xmm0, %xmm1
27224 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27225 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27226 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
27227 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27228 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
27229 ; SSE2-NEXT: pand %xmm2, %xmm3
27230 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
27231 ; SSE2-NEXT: por %xmm3, %xmm0
27234 ; SSE3-LABEL: ugt_30_v2i64:
27236 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27237 ; SSE3-NEXT: psrlw $1, %xmm1
27238 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27239 ; SSE3-NEXT: psubb %xmm1, %xmm0
27240 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27241 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27242 ; SSE3-NEXT: pand %xmm1, %xmm2
27243 ; SSE3-NEXT: psrlw $2, %xmm0
27244 ; SSE3-NEXT: pand %xmm1, %xmm0
27245 ; SSE3-NEXT: paddb %xmm2, %xmm0
27246 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27247 ; SSE3-NEXT: psrlw $4, %xmm1
27248 ; SSE3-NEXT: paddb %xmm0, %xmm1
27249 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27250 ; SSE3-NEXT: pxor %xmm0, %xmm0
27251 ; SSE3-NEXT: psadbw %xmm0, %xmm1
27252 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27253 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27254 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
27255 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27256 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
27257 ; SSE3-NEXT: pand %xmm2, %xmm3
27258 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
27259 ; SSE3-NEXT: por %xmm3, %xmm0
27262 ; SSSE3-LABEL: ugt_30_v2i64:
27264 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27265 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27266 ; SSSE3-NEXT: pand %xmm1, %xmm2
27267 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27268 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27269 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27270 ; SSSE3-NEXT: psrlw $4, %xmm0
27271 ; SSSE3-NEXT: pand %xmm1, %xmm0
27272 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27273 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27274 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27275 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
27276 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27277 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27278 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
27279 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27280 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
27281 ; SSSE3-NEXT: pand %xmm1, %xmm2
27282 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
27283 ; SSSE3-NEXT: por %xmm2, %xmm0
27286 ; SSE41-LABEL: ugt_30_v2i64:
27288 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27289 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27290 ; SSE41-NEXT: pand %xmm1, %xmm2
27291 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27292 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27293 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27294 ; SSE41-NEXT: psrlw $4, %xmm0
27295 ; SSE41-NEXT: pand %xmm1, %xmm0
27296 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27297 ; SSE41-NEXT: paddb %xmm4, %xmm3
27298 ; SSE41-NEXT: pxor %xmm0, %xmm0
27299 ; SSE41-NEXT: psadbw %xmm0, %xmm3
27300 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27301 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27302 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
27303 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27304 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
27305 ; SSE41-NEXT: pand %xmm1, %xmm2
27306 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
27307 ; SSE41-NEXT: por %xmm2, %xmm0
27310 ; AVX1-LABEL: ugt_30_v2i64:
27312 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27313 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27314 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27315 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27316 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27317 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27318 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27319 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27320 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27321 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27322 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27325 ; AVX2-LABEL: ugt_30_v2i64:
27327 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27328 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27329 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27330 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27331 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27332 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27333 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27334 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27335 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27336 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27337 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27340 ; AVX512VPOPCNTDQ-LABEL: ugt_30_v2i64:
27341 ; AVX512VPOPCNTDQ: # %bb.0:
27342 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27343 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27344 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27345 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27346 ; AVX512VPOPCNTDQ-NEXT: retq
27348 ; AVX512VPOPCNTDQVL-LABEL: ugt_30_v2i64:
27349 ; AVX512VPOPCNTDQVL: # %bb.0:
27350 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27351 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27352 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27353 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27354 ; AVX512VPOPCNTDQVL-NEXT: retq
27356 ; BITALG_NOVLX-LABEL: ugt_30_v2i64:
27357 ; BITALG_NOVLX: # %bb.0:
27358 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27359 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27360 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27361 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27362 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27363 ; BITALG_NOVLX-NEXT: vzeroupper
27364 ; BITALG_NOVLX-NEXT: retq
27366 ; BITALG-LABEL: ugt_30_v2i64:
27368 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27369 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27370 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27371 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27372 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27373 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27374 ; BITALG-NEXT: retq
27375 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27376 %3 = icmp ugt <2 x i64> %2, <i64 30, i64 30>
27377 %4 = sext <2 x i1> %3 to <2 x i64>
27381 define <2 x i64> @ult_31_v2i64(<2 x i64> %0) {
27382 ; SSE2-LABEL: ult_31_v2i64:
27384 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27385 ; SSE2-NEXT: psrlw $1, %xmm1
27386 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27387 ; SSE2-NEXT: psubb %xmm1, %xmm0
27388 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27389 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27390 ; SSE2-NEXT: pand %xmm1, %xmm2
27391 ; SSE2-NEXT: psrlw $2, %xmm0
27392 ; SSE2-NEXT: pand %xmm1, %xmm0
27393 ; SSE2-NEXT: paddb %xmm2, %xmm0
27394 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27395 ; SSE2-NEXT: psrlw $4, %xmm1
27396 ; SSE2-NEXT: paddb %xmm0, %xmm1
27397 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27398 ; SSE2-NEXT: pxor %xmm0, %xmm0
27399 ; SSE2-NEXT: psadbw %xmm0, %xmm1
27400 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27401 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27402 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
27403 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483679,2147483679]
27404 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
27405 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27406 ; SSE2-NEXT: pand %xmm2, %xmm1
27407 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27408 ; SSE2-NEXT: por %xmm1, %xmm0
27411 ; SSE3-LABEL: ult_31_v2i64:
27413 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27414 ; SSE3-NEXT: psrlw $1, %xmm1
27415 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27416 ; SSE3-NEXT: psubb %xmm1, %xmm0
27417 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27418 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27419 ; SSE3-NEXT: pand %xmm1, %xmm2
27420 ; SSE3-NEXT: psrlw $2, %xmm0
27421 ; SSE3-NEXT: pand %xmm1, %xmm0
27422 ; SSE3-NEXT: paddb %xmm2, %xmm0
27423 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27424 ; SSE3-NEXT: psrlw $4, %xmm1
27425 ; SSE3-NEXT: paddb %xmm0, %xmm1
27426 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27427 ; SSE3-NEXT: pxor %xmm0, %xmm0
27428 ; SSE3-NEXT: psadbw %xmm0, %xmm1
27429 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27430 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27431 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
27432 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483679,2147483679]
27433 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
27434 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27435 ; SSE3-NEXT: pand %xmm2, %xmm1
27436 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27437 ; SSE3-NEXT: por %xmm1, %xmm0
27440 ; SSSE3-LABEL: ult_31_v2i64:
27442 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27443 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27444 ; SSSE3-NEXT: pand %xmm1, %xmm2
27445 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27446 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27447 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27448 ; SSSE3-NEXT: psrlw $4, %xmm0
27449 ; SSSE3-NEXT: pand %xmm1, %xmm0
27450 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27451 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27452 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27453 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
27454 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27455 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27456 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
27457 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483679,2147483679]
27458 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
27459 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
27460 ; SSSE3-NEXT: pand %xmm1, %xmm2
27461 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27462 ; SSSE3-NEXT: por %xmm2, %xmm0
27465 ; SSE41-LABEL: ult_31_v2i64:
27467 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27468 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27469 ; SSE41-NEXT: pand %xmm1, %xmm2
27470 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27471 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27472 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27473 ; SSE41-NEXT: psrlw $4, %xmm0
27474 ; SSE41-NEXT: pand %xmm1, %xmm0
27475 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27476 ; SSE41-NEXT: paddb %xmm4, %xmm3
27477 ; SSE41-NEXT: pxor %xmm0, %xmm0
27478 ; SSE41-NEXT: psadbw %xmm0, %xmm3
27479 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27480 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27481 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
27482 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483679,2147483679]
27483 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
27484 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
27485 ; SSE41-NEXT: pand %xmm1, %xmm2
27486 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27487 ; SSE41-NEXT: por %xmm2, %xmm0
27490 ; AVX1-LABEL: ult_31_v2i64:
27492 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27493 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27494 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27495 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27496 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27497 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27498 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27499 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27500 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27501 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27502 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31]
27503 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27506 ; AVX2-LABEL: ult_31_v2i64:
27508 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27509 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27510 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27511 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27512 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27513 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27514 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27515 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27516 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27517 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27518 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31]
27519 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27522 ; AVX512VPOPCNTDQ-LABEL: ult_31_v2i64:
27523 ; AVX512VPOPCNTDQ: # %bb.0:
27524 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27525 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27526 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31]
27527 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27528 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27529 ; AVX512VPOPCNTDQ-NEXT: retq
27531 ; AVX512VPOPCNTDQVL-LABEL: ult_31_v2i64:
27532 ; AVX512VPOPCNTDQVL: # %bb.0:
27533 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27534 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27535 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27536 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27537 ; AVX512VPOPCNTDQVL-NEXT: retq
27539 ; BITALG_NOVLX-LABEL: ult_31_v2i64:
27540 ; BITALG_NOVLX: # %bb.0:
27541 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27542 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27543 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27544 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27545 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31]
27546 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27547 ; BITALG_NOVLX-NEXT: vzeroupper
27548 ; BITALG_NOVLX-NEXT: retq
27550 ; BITALG-LABEL: ult_31_v2i64:
27552 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27553 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27554 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27555 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27556 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27557 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27558 ; BITALG-NEXT: retq
27559 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27560 %3 = icmp ult <2 x i64> %2, <i64 31, i64 31>
27561 %4 = sext <2 x i1> %3 to <2 x i64>
27565 define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) {
27566 ; SSE2-LABEL: ugt_31_v2i64:
27568 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27569 ; SSE2-NEXT: psrlw $1, %xmm1
27570 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27571 ; SSE2-NEXT: psubb %xmm1, %xmm0
27572 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27573 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27574 ; SSE2-NEXT: pand %xmm1, %xmm2
27575 ; SSE2-NEXT: psrlw $2, %xmm0
27576 ; SSE2-NEXT: pand %xmm1, %xmm0
27577 ; SSE2-NEXT: paddb %xmm2, %xmm0
27578 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27579 ; SSE2-NEXT: psrlw $4, %xmm1
27580 ; SSE2-NEXT: paddb %xmm0, %xmm1
27581 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27582 ; SSE2-NEXT: pxor %xmm0, %xmm0
27583 ; SSE2-NEXT: psadbw %xmm0, %xmm1
27584 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27585 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27586 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
27587 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27588 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
27589 ; SSE2-NEXT: pand %xmm2, %xmm3
27590 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
27591 ; SSE2-NEXT: por %xmm3, %xmm0
27594 ; SSE3-LABEL: ugt_31_v2i64:
27596 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27597 ; SSE3-NEXT: psrlw $1, %xmm1
27598 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27599 ; SSE3-NEXT: psubb %xmm1, %xmm0
27600 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27601 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27602 ; SSE3-NEXT: pand %xmm1, %xmm2
27603 ; SSE3-NEXT: psrlw $2, %xmm0
27604 ; SSE3-NEXT: pand %xmm1, %xmm0
27605 ; SSE3-NEXT: paddb %xmm2, %xmm0
27606 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27607 ; SSE3-NEXT: psrlw $4, %xmm1
27608 ; SSE3-NEXT: paddb %xmm0, %xmm1
27609 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27610 ; SSE3-NEXT: pxor %xmm0, %xmm0
27611 ; SSE3-NEXT: psadbw %xmm0, %xmm1
27612 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27613 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27614 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
27615 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27616 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
27617 ; SSE3-NEXT: pand %xmm2, %xmm3
27618 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
27619 ; SSE3-NEXT: por %xmm3, %xmm0
27622 ; SSSE3-LABEL: ugt_31_v2i64:
27624 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27625 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27626 ; SSSE3-NEXT: pand %xmm1, %xmm2
27627 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27628 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27629 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27630 ; SSSE3-NEXT: psrlw $4, %xmm0
27631 ; SSSE3-NEXT: pand %xmm1, %xmm0
27632 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27633 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27634 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27635 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
27636 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27637 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27638 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
27639 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27640 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
27641 ; SSSE3-NEXT: pand %xmm1, %xmm2
27642 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
27643 ; SSSE3-NEXT: por %xmm2, %xmm0
27646 ; SSE41-LABEL: ugt_31_v2i64:
27648 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27649 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27650 ; SSE41-NEXT: pand %xmm1, %xmm2
27651 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27652 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27653 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27654 ; SSE41-NEXT: psrlw $4, %xmm0
27655 ; SSE41-NEXT: pand %xmm1, %xmm0
27656 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27657 ; SSE41-NEXT: paddb %xmm4, %xmm3
27658 ; SSE41-NEXT: pxor %xmm0, %xmm0
27659 ; SSE41-NEXT: psadbw %xmm0, %xmm3
27660 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27661 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27662 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
27663 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27664 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
27665 ; SSE41-NEXT: pand %xmm1, %xmm2
27666 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
27667 ; SSE41-NEXT: por %xmm2, %xmm0
27670 ; AVX1-LABEL: ugt_31_v2i64:
27672 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27673 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27674 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27675 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27676 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27677 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27678 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27679 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27680 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27681 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27682 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27685 ; AVX2-LABEL: ugt_31_v2i64:
27687 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27688 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27689 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27690 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27691 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27692 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27693 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27694 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27695 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27696 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27697 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27700 ; AVX512VPOPCNTDQ-LABEL: ugt_31_v2i64:
27701 ; AVX512VPOPCNTDQ: # %bb.0:
27702 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27703 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27704 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27705 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27706 ; AVX512VPOPCNTDQ-NEXT: retq
27708 ; AVX512VPOPCNTDQVL-LABEL: ugt_31_v2i64:
27709 ; AVX512VPOPCNTDQVL: # %bb.0:
27710 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27711 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27712 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27713 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27714 ; AVX512VPOPCNTDQVL-NEXT: retq
27716 ; BITALG_NOVLX-LABEL: ugt_31_v2i64:
27717 ; BITALG_NOVLX: # %bb.0:
27718 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27719 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27720 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27721 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27722 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27723 ; BITALG_NOVLX-NEXT: vzeroupper
27724 ; BITALG_NOVLX-NEXT: retq
27726 ; BITALG-LABEL: ugt_31_v2i64:
27728 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27729 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27730 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27731 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27732 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27733 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27734 ; BITALG-NEXT: retq
27735 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27736 %3 = icmp ugt <2 x i64> %2, <i64 31, i64 31>
27737 %4 = sext <2 x i1> %3 to <2 x i64>
27741 define <2 x i64> @ult_32_v2i64(<2 x i64> %0) {
27742 ; SSE2-LABEL: ult_32_v2i64:
27744 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27745 ; SSE2-NEXT: psrlw $1, %xmm1
27746 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27747 ; SSE2-NEXT: psubb %xmm1, %xmm0
27748 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27749 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27750 ; SSE2-NEXT: pand %xmm1, %xmm2
27751 ; SSE2-NEXT: psrlw $2, %xmm0
27752 ; SSE2-NEXT: pand %xmm1, %xmm0
27753 ; SSE2-NEXT: paddb %xmm2, %xmm0
27754 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27755 ; SSE2-NEXT: psrlw $4, %xmm1
27756 ; SSE2-NEXT: paddb %xmm0, %xmm1
27757 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27758 ; SSE2-NEXT: pxor %xmm0, %xmm0
27759 ; SSE2-NEXT: psadbw %xmm0, %xmm1
27760 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27761 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27762 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
27763 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483680,2147483680]
27764 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
27765 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27766 ; SSE2-NEXT: pand %xmm2, %xmm1
27767 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27768 ; SSE2-NEXT: por %xmm1, %xmm0
27771 ; SSE3-LABEL: ult_32_v2i64:
27773 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27774 ; SSE3-NEXT: psrlw $1, %xmm1
27775 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27776 ; SSE3-NEXT: psubb %xmm1, %xmm0
27777 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27778 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27779 ; SSE3-NEXT: pand %xmm1, %xmm2
27780 ; SSE3-NEXT: psrlw $2, %xmm0
27781 ; SSE3-NEXT: pand %xmm1, %xmm0
27782 ; SSE3-NEXT: paddb %xmm2, %xmm0
27783 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27784 ; SSE3-NEXT: psrlw $4, %xmm1
27785 ; SSE3-NEXT: paddb %xmm0, %xmm1
27786 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27787 ; SSE3-NEXT: pxor %xmm0, %xmm0
27788 ; SSE3-NEXT: psadbw %xmm0, %xmm1
27789 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27790 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27791 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
27792 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483680,2147483680]
27793 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
27794 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27795 ; SSE3-NEXT: pand %xmm2, %xmm1
27796 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27797 ; SSE3-NEXT: por %xmm1, %xmm0
27800 ; SSSE3-LABEL: ult_32_v2i64:
27802 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27803 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27804 ; SSSE3-NEXT: pand %xmm1, %xmm2
27805 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27806 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27807 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27808 ; SSSE3-NEXT: psrlw $4, %xmm0
27809 ; SSSE3-NEXT: pand %xmm1, %xmm0
27810 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27811 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27812 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27813 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
27814 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27815 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27816 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
27817 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483680,2147483680]
27818 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
27819 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
27820 ; SSSE3-NEXT: pand %xmm1, %xmm2
27821 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27822 ; SSSE3-NEXT: por %xmm2, %xmm0
27825 ; SSE41-LABEL: ult_32_v2i64:
27827 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27828 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27829 ; SSE41-NEXT: pand %xmm1, %xmm2
27830 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27831 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27832 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27833 ; SSE41-NEXT: psrlw $4, %xmm0
27834 ; SSE41-NEXT: pand %xmm1, %xmm0
27835 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27836 ; SSE41-NEXT: paddb %xmm4, %xmm3
27837 ; SSE41-NEXT: pxor %xmm0, %xmm0
27838 ; SSE41-NEXT: psadbw %xmm0, %xmm3
27839 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27840 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27841 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
27842 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483680,2147483680]
27843 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
27844 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
27845 ; SSE41-NEXT: pand %xmm1, %xmm2
27846 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
27847 ; SSE41-NEXT: por %xmm2, %xmm0
27850 ; AVX1-LABEL: ult_32_v2i64:
27852 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27853 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27854 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27855 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27856 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27857 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27858 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27859 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27860 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27861 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27862 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32]
27863 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27866 ; AVX2-LABEL: ult_32_v2i64:
27868 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27869 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27870 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27871 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27872 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27873 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27874 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27875 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27876 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27877 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27878 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32]
27879 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27882 ; AVX512VPOPCNTDQ-LABEL: ult_32_v2i64:
27883 ; AVX512VPOPCNTDQ: # %bb.0:
27884 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27885 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27886 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32]
27887 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27888 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27889 ; AVX512VPOPCNTDQ-NEXT: retq
27891 ; AVX512VPOPCNTDQVL-LABEL: ult_32_v2i64:
27892 ; AVX512VPOPCNTDQVL: # %bb.0:
27893 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27894 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27895 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27896 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27897 ; AVX512VPOPCNTDQVL-NEXT: retq
27899 ; BITALG_NOVLX-LABEL: ult_32_v2i64:
27900 ; BITALG_NOVLX: # %bb.0:
27901 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27902 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27903 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27904 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27905 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32]
27906 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27907 ; BITALG_NOVLX-NEXT: vzeroupper
27908 ; BITALG_NOVLX-NEXT: retq
27910 ; BITALG-LABEL: ult_32_v2i64:
27912 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27913 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27914 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27915 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
27916 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
27917 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
27918 ; BITALG-NEXT: retq
27919 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27920 %3 = icmp ult <2 x i64> %2, <i64 32, i64 32>
27921 %4 = sext <2 x i1> %3 to <2 x i64>
27925 define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) {
27926 ; SSE2-LABEL: ugt_32_v2i64:
27928 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27929 ; SSE2-NEXT: psrlw $1, %xmm1
27930 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27931 ; SSE2-NEXT: psubb %xmm1, %xmm0
27932 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27933 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27934 ; SSE2-NEXT: pand %xmm1, %xmm2
27935 ; SSE2-NEXT: psrlw $2, %xmm0
27936 ; SSE2-NEXT: pand %xmm1, %xmm0
27937 ; SSE2-NEXT: paddb %xmm2, %xmm0
27938 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27939 ; SSE2-NEXT: psrlw $4, %xmm1
27940 ; SSE2-NEXT: paddb %xmm0, %xmm1
27941 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27942 ; SSE2-NEXT: pxor %xmm0, %xmm0
27943 ; SSE2-NEXT: psadbw %xmm0, %xmm1
27944 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27945 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27946 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
27947 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27948 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
27949 ; SSE2-NEXT: pand %xmm2, %xmm3
27950 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
27951 ; SSE2-NEXT: por %xmm3, %xmm0
27954 ; SSE3-LABEL: ugt_32_v2i64:
27956 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27957 ; SSE3-NEXT: psrlw $1, %xmm1
27958 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27959 ; SSE3-NEXT: psubb %xmm1, %xmm0
27960 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27961 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27962 ; SSE3-NEXT: pand %xmm1, %xmm2
27963 ; SSE3-NEXT: psrlw $2, %xmm0
27964 ; SSE3-NEXT: pand %xmm1, %xmm0
27965 ; SSE3-NEXT: paddb %xmm2, %xmm0
27966 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27967 ; SSE3-NEXT: psrlw $4, %xmm1
27968 ; SSE3-NEXT: paddb %xmm0, %xmm1
27969 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27970 ; SSE3-NEXT: pxor %xmm0, %xmm0
27971 ; SSE3-NEXT: psadbw %xmm0, %xmm1
27972 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27973 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
27974 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
27975 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27976 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
27977 ; SSE3-NEXT: pand %xmm2, %xmm3
27978 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
27979 ; SSE3-NEXT: por %xmm3, %xmm0
27982 ; SSSE3-LABEL: ugt_32_v2i64:
27984 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27985 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27986 ; SSSE3-NEXT: pand %xmm1, %xmm2
27987 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27988 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27989 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27990 ; SSSE3-NEXT: psrlw $4, %xmm0
27991 ; SSSE3-NEXT: pand %xmm1, %xmm0
27992 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27993 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27994 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27995 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
27996 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
27997 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
27998 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
27999 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28000 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
28001 ; SSSE3-NEXT: pand %xmm1, %xmm2
28002 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
28003 ; SSSE3-NEXT: por %xmm2, %xmm0
28006 ; SSE41-LABEL: ugt_32_v2i64:
28008 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28009 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28010 ; SSE41-NEXT: pand %xmm1, %xmm2
28011 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28012 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28013 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28014 ; SSE41-NEXT: psrlw $4, %xmm0
28015 ; SSE41-NEXT: pand %xmm1, %xmm0
28016 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28017 ; SSE41-NEXT: paddb %xmm4, %xmm3
28018 ; SSE41-NEXT: pxor %xmm0, %xmm0
28019 ; SSE41-NEXT: psadbw %xmm0, %xmm3
28020 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28021 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28022 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
28023 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28024 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
28025 ; SSE41-NEXT: pand %xmm1, %xmm2
28026 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
28027 ; SSE41-NEXT: por %xmm2, %xmm0
28030 ; AVX1-LABEL: ugt_32_v2i64:
28032 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28033 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28034 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28035 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28036 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28037 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28038 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28039 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28040 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28041 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28042 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28045 ; AVX2-LABEL: ugt_32_v2i64:
28047 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28048 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28049 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28050 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28051 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28052 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28053 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28054 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28055 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28056 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28057 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28060 ; AVX512VPOPCNTDQ-LABEL: ugt_32_v2i64:
28061 ; AVX512VPOPCNTDQ: # %bb.0:
28062 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28063 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28064 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28065 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28066 ; AVX512VPOPCNTDQ-NEXT: retq
28068 ; AVX512VPOPCNTDQVL-LABEL: ugt_32_v2i64:
28069 ; AVX512VPOPCNTDQVL: # %bb.0:
28070 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28071 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28072 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28073 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28074 ; AVX512VPOPCNTDQVL-NEXT: retq
28076 ; BITALG_NOVLX-LABEL: ugt_32_v2i64:
28077 ; BITALG_NOVLX: # %bb.0:
28078 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28079 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28080 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28081 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28082 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28083 ; BITALG_NOVLX-NEXT: vzeroupper
28084 ; BITALG_NOVLX-NEXT: retq
28086 ; BITALG-LABEL: ugt_32_v2i64:
28088 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28089 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28090 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28091 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28092 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28093 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28094 ; BITALG-NEXT: retq
28095 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28096 %3 = icmp ugt <2 x i64> %2, <i64 32, i64 32>
28097 %4 = sext <2 x i1> %3 to <2 x i64>
28101 define <2 x i64> @ult_33_v2i64(<2 x i64> %0) {
28102 ; SSE2-LABEL: ult_33_v2i64:
28104 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28105 ; SSE2-NEXT: psrlw $1, %xmm1
28106 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28107 ; SSE2-NEXT: psubb %xmm1, %xmm0
28108 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28109 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28110 ; SSE2-NEXT: pand %xmm1, %xmm2
28111 ; SSE2-NEXT: psrlw $2, %xmm0
28112 ; SSE2-NEXT: pand %xmm1, %xmm0
28113 ; SSE2-NEXT: paddb %xmm2, %xmm0
28114 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28115 ; SSE2-NEXT: psrlw $4, %xmm1
28116 ; SSE2-NEXT: paddb %xmm0, %xmm1
28117 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28118 ; SSE2-NEXT: pxor %xmm0, %xmm0
28119 ; SSE2-NEXT: psadbw %xmm0, %xmm1
28120 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28121 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
28122 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
28123 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483681,2147483681]
28124 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
28125 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28126 ; SSE2-NEXT: pand %xmm2, %xmm1
28127 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28128 ; SSE2-NEXT: por %xmm1, %xmm0
28131 ; SSE3-LABEL: ult_33_v2i64:
28133 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28134 ; SSE3-NEXT: psrlw $1, %xmm1
28135 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28136 ; SSE3-NEXT: psubb %xmm1, %xmm0
28137 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28138 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28139 ; SSE3-NEXT: pand %xmm1, %xmm2
28140 ; SSE3-NEXT: psrlw $2, %xmm0
28141 ; SSE3-NEXT: pand %xmm1, %xmm0
28142 ; SSE3-NEXT: paddb %xmm2, %xmm0
28143 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28144 ; SSE3-NEXT: psrlw $4, %xmm1
28145 ; SSE3-NEXT: paddb %xmm0, %xmm1
28146 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28147 ; SSE3-NEXT: pxor %xmm0, %xmm0
28148 ; SSE3-NEXT: psadbw %xmm0, %xmm1
28149 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28150 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
28151 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
28152 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483681,2147483681]
28153 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
28154 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28155 ; SSE3-NEXT: pand %xmm2, %xmm1
28156 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28157 ; SSE3-NEXT: por %xmm1, %xmm0
28160 ; SSSE3-LABEL: ult_33_v2i64:
28162 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28163 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28164 ; SSSE3-NEXT: pand %xmm1, %xmm2
28165 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28166 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28167 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28168 ; SSSE3-NEXT: psrlw $4, %xmm0
28169 ; SSSE3-NEXT: pand %xmm1, %xmm0
28170 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28171 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28172 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28173 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
28174 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28175 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28176 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
28177 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483681,2147483681]
28178 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
28179 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
28180 ; SSSE3-NEXT: pand %xmm1, %xmm2
28181 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28182 ; SSSE3-NEXT: por %xmm2, %xmm0
28185 ; SSE41-LABEL: ult_33_v2i64:
28187 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28188 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28189 ; SSE41-NEXT: pand %xmm1, %xmm2
28190 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28191 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28192 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28193 ; SSE41-NEXT: psrlw $4, %xmm0
28194 ; SSE41-NEXT: pand %xmm1, %xmm0
28195 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28196 ; SSE41-NEXT: paddb %xmm4, %xmm3
28197 ; SSE41-NEXT: pxor %xmm0, %xmm0
28198 ; SSE41-NEXT: psadbw %xmm0, %xmm3
28199 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28200 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28201 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
28202 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483681,2147483681]
28203 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
28204 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
28205 ; SSE41-NEXT: pand %xmm1, %xmm2
28206 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28207 ; SSE41-NEXT: por %xmm2, %xmm0
28210 ; AVX1-LABEL: ult_33_v2i64:
28212 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28213 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28214 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28215 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28216 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28217 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28218 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28219 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28220 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28221 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28222 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33]
28223 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28226 ; AVX2-LABEL: ult_33_v2i64:
28228 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28229 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28230 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28231 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28232 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28233 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28234 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28235 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28236 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28237 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28238 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33]
28239 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28242 ; AVX512VPOPCNTDQ-LABEL: ult_33_v2i64:
28243 ; AVX512VPOPCNTDQ: # %bb.0:
28244 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28245 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28246 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33]
28247 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28248 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28249 ; AVX512VPOPCNTDQ-NEXT: retq
28251 ; AVX512VPOPCNTDQVL-LABEL: ult_33_v2i64:
28252 ; AVX512VPOPCNTDQVL: # %bb.0:
28253 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28254 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28255 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28256 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28257 ; AVX512VPOPCNTDQVL-NEXT: retq
28259 ; BITALG_NOVLX-LABEL: ult_33_v2i64:
28260 ; BITALG_NOVLX: # %bb.0:
28261 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28262 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28263 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28264 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28265 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33]
28266 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28267 ; BITALG_NOVLX-NEXT: vzeroupper
28268 ; BITALG_NOVLX-NEXT: retq
28270 ; BITALG-LABEL: ult_33_v2i64:
28272 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28273 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28274 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28275 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28276 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28277 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28278 ; BITALG-NEXT: retq
28279 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28280 %3 = icmp ult <2 x i64> %2, <i64 33, i64 33>
28281 %4 = sext <2 x i1> %3 to <2 x i64>
28285 define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) {
28286 ; SSE2-LABEL: ugt_33_v2i64:
28288 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28289 ; SSE2-NEXT: psrlw $1, %xmm1
28290 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28291 ; SSE2-NEXT: psubb %xmm1, %xmm0
28292 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28293 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28294 ; SSE2-NEXT: pand %xmm1, %xmm2
28295 ; SSE2-NEXT: psrlw $2, %xmm0
28296 ; SSE2-NEXT: pand %xmm1, %xmm0
28297 ; SSE2-NEXT: paddb %xmm2, %xmm0
28298 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28299 ; SSE2-NEXT: psrlw $4, %xmm1
28300 ; SSE2-NEXT: paddb %xmm0, %xmm1
28301 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28302 ; SSE2-NEXT: pxor %xmm0, %xmm0
28303 ; SSE2-NEXT: psadbw %xmm0, %xmm1
28304 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28305 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
28306 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
28307 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28308 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
28309 ; SSE2-NEXT: pand %xmm2, %xmm3
28310 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
28311 ; SSE2-NEXT: por %xmm3, %xmm0
28314 ; SSE3-LABEL: ugt_33_v2i64:
28316 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28317 ; SSE3-NEXT: psrlw $1, %xmm1
28318 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28319 ; SSE3-NEXT: psubb %xmm1, %xmm0
28320 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28321 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28322 ; SSE3-NEXT: pand %xmm1, %xmm2
28323 ; SSE3-NEXT: psrlw $2, %xmm0
28324 ; SSE3-NEXT: pand %xmm1, %xmm0
28325 ; SSE3-NEXT: paddb %xmm2, %xmm0
28326 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28327 ; SSE3-NEXT: psrlw $4, %xmm1
28328 ; SSE3-NEXT: paddb %xmm0, %xmm1
28329 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28330 ; SSE3-NEXT: pxor %xmm0, %xmm0
28331 ; SSE3-NEXT: psadbw %xmm0, %xmm1
28332 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28333 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
28334 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
28335 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28336 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
28337 ; SSE3-NEXT: pand %xmm2, %xmm3
28338 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
28339 ; SSE3-NEXT: por %xmm3, %xmm0
28342 ; SSSE3-LABEL: ugt_33_v2i64:
28344 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28345 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28346 ; SSSE3-NEXT: pand %xmm1, %xmm2
28347 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28348 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28349 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28350 ; SSSE3-NEXT: psrlw $4, %xmm0
28351 ; SSSE3-NEXT: pand %xmm1, %xmm0
28352 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28353 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28354 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28355 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
28356 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28357 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28358 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
28359 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28360 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
28361 ; SSSE3-NEXT: pand %xmm1, %xmm2
28362 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
28363 ; SSSE3-NEXT: por %xmm2, %xmm0
28366 ; SSE41-LABEL: ugt_33_v2i64:
28368 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28369 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28370 ; SSE41-NEXT: pand %xmm1, %xmm2
28371 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28372 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28373 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28374 ; SSE41-NEXT: psrlw $4, %xmm0
28375 ; SSE41-NEXT: pand %xmm1, %xmm0
28376 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28377 ; SSE41-NEXT: paddb %xmm4, %xmm3
28378 ; SSE41-NEXT: pxor %xmm0, %xmm0
28379 ; SSE41-NEXT: psadbw %xmm0, %xmm3
28380 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28381 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28382 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
28383 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28384 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
28385 ; SSE41-NEXT: pand %xmm1, %xmm2
28386 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
28387 ; SSE41-NEXT: por %xmm2, %xmm0
28390 ; AVX1-LABEL: ugt_33_v2i64:
28392 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28393 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28394 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28395 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28396 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28397 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28398 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28399 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28400 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28401 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28402 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28405 ; AVX2-LABEL: ugt_33_v2i64:
28407 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28408 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28409 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28410 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28411 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28412 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28413 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28414 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28415 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28416 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28417 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28420 ; AVX512VPOPCNTDQ-LABEL: ugt_33_v2i64:
28421 ; AVX512VPOPCNTDQ: # %bb.0:
28422 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28423 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28424 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28425 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28426 ; AVX512VPOPCNTDQ-NEXT: retq
28428 ; AVX512VPOPCNTDQVL-LABEL: ugt_33_v2i64:
28429 ; AVX512VPOPCNTDQVL: # %bb.0:
28430 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28431 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28432 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28433 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28434 ; AVX512VPOPCNTDQVL-NEXT: retq
28436 ; BITALG_NOVLX-LABEL: ugt_33_v2i64:
28437 ; BITALG_NOVLX: # %bb.0:
28438 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28439 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28440 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28441 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28442 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28443 ; BITALG_NOVLX-NEXT: vzeroupper
28444 ; BITALG_NOVLX-NEXT: retq
28446 ; BITALG-LABEL: ugt_33_v2i64:
28448 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28449 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28450 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28451 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28452 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28453 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28454 ; BITALG-NEXT: retq
28455 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28456 %3 = icmp ugt <2 x i64> %2, <i64 33, i64 33>
28457 %4 = sext <2 x i1> %3 to <2 x i64>
28461 define <2 x i64> @ult_34_v2i64(<2 x i64> %0) {
28462 ; SSE2-LABEL: ult_34_v2i64:
28464 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28465 ; SSE2-NEXT: psrlw $1, %xmm1
28466 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28467 ; SSE2-NEXT: psubb %xmm1, %xmm0
28468 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28469 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28470 ; SSE2-NEXT: pand %xmm1, %xmm2
28471 ; SSE2-NEXT: psrlw $2, %xmm0
28472 ; SSE2-NEXT: pand %xmm1, %xmm0
28473 ; SSE2-NEXT: paddb %xmm2, %xmm0
28474 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28475 ; SSE2-NEXT: psrlw $4, %xmm1
28476 ; SSE2-NEXT: paddb %xmm0, %xmm1
28477 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28478 ; SSE2-NEXT: pxor %xmm0, %xmm0
28479 ; SSE2-NEXT: psadbw %xmm0, %xmm1
28480 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28481 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
28482 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
28483 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483682,2147483682]
28484 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
28485 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28486 ; SSE2-NEXT: pand %xmm2, %xmm1
28487 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28488 ; SSE2-NEXT: por %xmm1, %xmm0
28491 ; SSE3-LABEL: ult_34_v2i64:
28493 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28494 ; SSE3-NEXT: psrlw $1, %xmm1
28495 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28496 ; SSE3-NEXT: psubb %xmm1, %xmm0
28497 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28498 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28499 ; SSE3-NEXT: pand %xmm1, %xmm2
28500 ; SSE3-NEXT: psrlw $2, %xmm0
28501 ; SSE3-NEXT: pand %xmm1, %xmm0
28502 ; SSE3-NEXT: paddb %xmm2, %xmm0
28503 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28504 ; SSE3-NEXT: psrlw $4, %xmm1
28505 ; SSE3-NEXT: paddb %xmm0, %xmm1
28506 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28507 ; SSE3-NEXT: pxor %xmm0, %xmm0
28508 ; SSE3-NEXT: psadbw %xmm0, %xmm1
28509 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28510 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
28511 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
28512 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483682,2147483682]
28513 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
28514 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28515 ; SSE3-NEXT: pand %xmm2, %xmm1
28516 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28517 ; SSE3-NEXT: por %xmm1, %xmm0
28520 ; SSSE3-LABEL: ult_34_v2i64:
28522 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28523 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28524 ; SSSE3-NEXT: pand %xmm1, %xmm2
28525 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28526 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28527 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28528 ; SSSE3-NEXT: psrlw $4, %xmm0
28529 ; SSSE3-NEXT: pand %xmm1, %xmm0
28530 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28531 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28532 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28533 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
28534 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28535 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28536 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
28537 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483682,2147483682]
28538 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
28539 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
28540 ; SSSE3-NEXT: pand %xmm1, %xmm2
28541 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28542 ; SSSE3-NEXT: por %xmm2, %xmm0
28545 ; SSE41-LABEL: ult_34_v2i64:
28547 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28548 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28549 ; SSE41-NEXT: pand %xmm1, %xmm2
28550 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28551 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28552 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28553 ; SSE41-NEXT: psrlw $4, %xmm0
28554 ; SSE41-NEXT: pand %xmm1, %xmm0
28555 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28556 ; SSE41-NEXT: paddb %xmm4, %xmm3
28557 ; SSE41-NEXT: pxor %xmm0, %xmm0
28558 ; SSE41-NEXT: psadbw %xmm0, %xmm3
28559 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28560 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28561 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
28562 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483682,2147483682]
28563 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
28564 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
28565 ; SSE41-NEXT: pand %xmm1, %xmm2
28566 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28567 ; SSE41-NEXT: por %xmm2, %xmm0
28570 ; AVX1-LABEL: ult_34_v2i64:
28572 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28573 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28574 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28575 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28576 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28577 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28578 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28579 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28580 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28581 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28582 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34]
28583 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28586 ; AVX2-LABEL: ult_34_v2i64:
28588 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28589 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28590 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28591 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28592 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28593 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28594 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28595 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28596 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28597 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28598 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34]
28599 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28602 ; AVX512VPOPCNTDQ-LABEL: ult_34_v2i64:
28603 ; AVX512VPOPCNTDQ: # %bb.0:
28604 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28605 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28606 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34]
28607 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28608 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28609 ; AVX512VPOPCNTDQ-NEXT: retq
28611 ; AVX512VPOPCNTDQVL-LABEL: ult_34_v2i64:
28612 ; AVX512VPOPCNTDQVL: # %bb.0:
28613 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28614 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28615 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28616 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28617 ; AVX512VPOPCNTDQVL-NEXT: retq
28619 ; BITALG_NOVLX-LABEL: ult_34_v2i64:
28620 ; BITALG_NOVLX: # %bb.0:
28621 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28622 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28623 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28624 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28625 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34]
28626 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28627 ; BITALG_NOVLX-NEXT: vzeroupper
28628 ; BITALG_NOVLX-NEXT: retq
28630 ; BITALG-LABEL: ult_34_v2i64:
28632 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28633 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28634 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28635 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28636 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28637 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28638 ; BITALG-NEXT: retq
28639 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28640 %3 = icmp ult <2 x i64> %2, <i64 34, i64 34>
28641 %4 = sext <2 x i1> %3 to <2 x i64>
28645 define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) {
28646 ; SSE2-LABEL: ugt_34_v2i64:
28648 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28649 ; SSE2-NEXT: psrlw $1, %xmm1
28650 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28651 ; SSE2-NEXT: psubb %xmm1, %xmm0
28652 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28653 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28654 ; SSE2-NEXT: pand %xmm1, %xmm2
28655 ; SSE2-NEXT: psrlw $2, %xmm0
28656 ; SSE2-NEXT: pand %xmm1, %xmm0
28657 ; SSE2-NEXT: paddb %xmm2, %xmm0
28658 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28659 ; SSE2-NEXT: psrlw $4, %xmm1
28660 ; SSE2-NEXT: paddb %xmm0, %xmm1
28661 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28662 ; SSE2-NEXT: pxor %xmm0, %xmm0
28663 ; SSE2-NEXT: psadbw %xmm0, %xmm1
28664 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28665 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
28666 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
28667 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28668 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
28669 ; SSE2-NEXT: pand %xmm2, %xmm3
28670 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
28671 ; SSE2-NEXT: por %xmm3, %xmm0
28674 ; SSE3-LABEL: ugt_34_v2i64:
28676 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28677 ; SSE3-NEXT: psrlw $1, %xmm1
28678 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28679 ; SSE3-NEXT: psubb %xmm1, %xmm0
28680 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28681 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28682 ; SSE3-NEXT: pand %xmm1, %xmm2
28683 ; SSE3-NEXT: psrlw $2, %xmm0
28684 ; SSE3-NEXT: pand %xmm1, %xmm0
28685 ; SSE3-NEXT: paddb %xmm2, %xmm0
28686 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28687 ; SSE3-NEXT: psrlw $4, %xmm1
28688 ; SSE3-NEXT: paddb %xmm0, %xmm1
28689 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28690 ; SSE3-NEXT: pxor %xmm0, %xmm0
28691 ; SSE3-NEXT: psadbw %xmm0, %xmm1
28692 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28693 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
28694 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
28695 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28696 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
28697 ; SSE3-NEXT: pand %xmm2, %xmm3
28698 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
28699 ; SSE3-NEXT: por %xmm3, %xmm0
28702 ; SSSE3-LABEL: ugt_34_v2i64:
28704 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28705 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28706 ; SSSE3-NEXT: pand %xmm1, %xmm2
28707 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28708 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28709 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28710 ; SSSE3-NEXT: psrlw $4, %xmm0
28711 ; SSSE3-NEXT: pand %xmm1, %xmm0
28712 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28713 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28714 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28715 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
28716 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28717 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28718 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
28719 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28720 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
28721 ; SSSE3-NEXT: pand %xmm1, %xmm2
28722 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
28723 ; SSSE3-NEXT: por %xmm2, %xmm0
28726 ; SSE41-LABEL: ugt_34_v2i64:
28728 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28729 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28730 ; SSE41-NEXT: pand %xmm1, %xmm2
28731 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28732 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28733 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28734 ; SSE41-NEXT: psrlw $4, %xmm0
28735 ; SSE41-NEXT: pand %xmm1, %xmm0
28736 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28737 ; SSE41-NEXT: paddb %xmm4, %xmm3
28738 ; SSE41-NEXT: pxor %xmm0, %xmm0
28739 ; SSE41-NEXT: psadbw %xmm0, %xmm3
28740 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28741 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28742 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
28743 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28744 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
28745 ; SSE41-NEXT: pand %xmm1, %xmm2
28746 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
28747 ; SSE41-NEXT: por %xmm2, %xmm0
28750 ; AVX1-LABEL: ugt_34_v2i64:
28752 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28753 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28754 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28755 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28756 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28757 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28758 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28759 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28760 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28761 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28762 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28765 ; AVX2-LABEL: ugt_34_v2i64:
28767 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28768 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28769 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28770 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28771 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28772 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28773 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28774 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28775 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28776 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28777 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28780 ; AVX512VPOPCNTDQ-LABEL: ugt_34_v2i64:
28781 ; AVX512VPOPCNTDQ: # %bb.0:
28782 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28783 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28784 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28785 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28786 ; AVX512VPOPCNTDQ-NEXT: retq
28788 ; AVX512VPOPCNTDQVL-LABEL: ugt_34_v2i64:
28789 ; AVX512VPOPCNTDQVL: # %bb.0:
28790 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28791 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28792 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28793 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28794 ; AVX512VPOPCNTDQVL-NEXT: retq
28796 ; BITALG_NOVLX-LABEL: ugt_34_v2i64:
28797 ; BITALG_NOVLX: # %bb.0:
28798 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28799 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28800 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28801 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28802 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28803 ; BITALG_NOVLX-NEXT: vzeroupper
28804 ; BITALG_NOVLX-NEXT: retq
28806 ; BITALG-LABEL: ugt_34_v2i64:
28808 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28809 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28810 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28811 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28812 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28813 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28814 ; BITALG-NEXT: retq
28815 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28816 %3 = icmp ugt <2 x i64> %2, <i64 34, i64 34>
28817 %4 = sext <2 x i1> %3 to <2 x i64>
28821 define <2 x i64> @ult_35_v2i64(<2 x i64> %0) {
28822 ; SSE2-LABEL: ult_35_v2i64:
28824 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28825 ; SSE2-NEXT: psrlw $1, %xmm1
28826 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28827 ; SSE2-NEXT: psubb %xmm1, %xmm0
28828 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28829 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28830 ; SSE2-NEXT: pand %xmm1, %xmm2
28831 ; SSE2-NEXT: psrlw $2, %xmm0
28832 ; SSE2-NEXT: pand %xmm1, %xmm0
28833 ; SSE2-NEXT: paddb %xmm2, %xmm0
28834 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28835 ; SSE2-NEXT: psrlw $4, %xmm1
28836 ; SSE2-NEXT: paddb %xmm0, %xmm1
28837 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28838 ; SSE2-NEXT: pxor %xmm0, %xmm0
28839 ; SSE2-NEXT: psadbw %xmm0, %xmm1
28840 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28841 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
28842 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
28843 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483683,2147483683]
28844 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
28845 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28846 ; SSE2-NEXT: pand %xmm2, %xmm1
28847 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28848 ; SSE2-NEXT: por %xmm1, %xmm0
28851 ; SSE3-LABEL: ult_35_v2i64:
28853 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28854 ; SSE3-NEXT: psrlw $1, %xmm1
28855 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28856 ; SSE3-NEXT: psubb %xmm1, %xmm0
28857 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28858 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28859 ; SSE3-NEXT: pand %xmm1, %xmm2
28860 ; SSE3-NEXT: psrlw $2, %xmm0
28861 ; SSE3-NEXT: pand %xmm1, %xmm0
28862 ; SSE3-NEXT: paddb %xmm2, %xmm0
28863 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28864 ; SSE3-NEXT: psrlw $4, %xmm1
28865 ; SSE3-NEXT: paddb %xmm0, %xmm1
28866 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28867 ; SSE3-NEXT: pxor %xmm0, %xmm0
28868 ; SSE3-NEXT: psadbw %xmm0, %xmm1
28869 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28870 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
28871 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
28872 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483683,2147483683]
28873 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
28874 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28875 ; SSE3-NEXT: pand %xmm2, %xmm1
28876 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28877 ; SSE3-NEXT: por %xmm1, %xmm0
28880 ; SSSE3-LABEL: ult_35_v2i64:
28882 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28883 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28884 ; SSSE3-NEXT: pand %xmm1, %xmm2
28885 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28886 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28887 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28888 ; SSSE3-NEXT: psrlw $4, %xmm0
28889 ; SSSE3-NEXT: pand %xmm1, %xmm0
28890 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28891 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28892 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28893 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
28894 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28895 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28896 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
28897 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483683,2147483683]
28898 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
28899 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
28900 ; SSSE3-NEXT: pand %xmm1, %xmm2
28901 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28902 ; SSSE3-NEXT: por %xmm2, %xmm0
28905 ; SSE41-LABEL: ult_35_v2i64:
28907 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28908 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28909 ; SSE41-NEXT: pand %xmm1, %xmm2
28910 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28911 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28912 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28913 ; SSE41-NEXT: psrlw $4, %xmm0
28914 ; SSE41-NEXT: pand %xmm1, %xmm0
28915 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28916 ; SSE41-NEXT: paddb %xmm4, %xmm3
28917 ; SSE41-NEXT: pxor %xmm0, %xmm0
28918 ; SSE41-NEXT: psadbw %xmm0, %xmm3
28919 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
28920 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
28921 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
28922 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483683,2147483683]
28923 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
28924 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
28925 ; SSE41-NEXT: pand %xmm1, %xmm2
28926 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
28927 ; SSE41-NEXT: por %xmm2, %xmm0
28930 ; AVX1-LABEL: ult_35_v2i64:
28932 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28933 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28934 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28935 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28936 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28937 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28938 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28939 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28940 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28941 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28942 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35]
28943 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28946 ; AVX2-LABEL: ult_35_v2i64:
28948 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28949 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28950 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28951 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28952 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28953 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28954 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28955 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28956 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28957 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28958 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35]
28959 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28962 ; AVX512VPOPCNTDQ-LABEL: ult_35_v2i64:
28963 ; AVX512VPOPCNTDQ: # %bb.0:
28964 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28965 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28966 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35]
28967 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28968 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28969 ; AVX512VPOPCNTDQ-NEXT: retq
28971 ; AVX512VPOPCNTDQVL-LABEL: ult_35_v2i64:
28972 ; AVX512VPOPCNTDQVL: # %bb.0:
28973 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28974 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28975 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28976 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28977 ; AVX512VPOPCNTDQVL-NEXT: retq
28979 ; BITALG_NOVLX-LABEL: ult_35_v2i64:
28980 ; BITALG_NOVLX: # %bb.0:
28981 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28982 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28983 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28984 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28985 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35]
28986 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28987 ; BITALG_NOVLX-NEXT: vzeroupper
28988 ; BITALG_NOVLX-NEXT: retq
28990 ; BITALG-LABEL: ult_35_v2i64:
28992 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28993 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28994 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28995 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
28996 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28997 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
28998 ; BITALG-NEXT: retq
28999 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29000 %3 = icmp ult <2 x i64> %2, <i64 35, i64 35>
29001 %4 = sext <2 x i1> %3 to <2 x i64>
29005 define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) {
29006 ; SSE2-LABEL: ugt_35_v2i64:
29008 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29009 ; SSE2-NEXT: psrlw $1, %xmm1
29010 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29011 ; SSE2-NEXT: psubb %xmm1, %xmm0
29012 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29013 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29014 ; SSE2-NEXT: pand %xmm1, %xmm2
29015 ; SSE2-NEXT: psrlw $2, %xmm0
29016 ; SSE2-NEXT: pand %xmm1, %xmm0
29017 ; SSE2-NEXT: paddb %xmm2, %xmm0
29018 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29019 ; SSE2-NEXT: psrlw $4, %xmm1
29020 ; SSE2-NEXT: paddb %xmm0, %xmm1
29021 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29022 ; SSE2-NEXT: pxor %xmm0, %xmm0
29023 ; SSE2-NEXT: psadbw %xmm0, %xmm1
29024 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29025 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29026 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
29027 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29028 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
29029 ; SSE2-NEXT: pand %xmm2, %xmm3
29030 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
29031 ; SSE2-NEXT: por %xmm3, %xmm0
29034 ; SSE3-LABEL: ugt_35_v2i64:
29036 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29037 ; SSE3-NEXT: psrlw $1, %xmm1
29038 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29039 ; SSE3-NEXT: psubb %xmm1, %xmm0
29040 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29041 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29042 ; SSE3-NEXT: pand %xmm1, %xmm2
29043 ; SSE3-NEXT: psrlw $2, %xmm0
29044 ; SSE3-NEXT: pand %xmm1, %xmm0
29045 ; SSE3-NEXT: paddb %xmm2, %xmm0
29046 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29047 ; SSE3-NEXT: psrlw $4, %xmm1
29048 ; SSE3-NEXT: paddb %xmm0, %xmm1
29049 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29050 ; SSE3-NEXT: pxor %xmm0, %xmm0
29051 ; SSE3-NEXT: psadbw %xmm0, %xmm1
29052 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29053 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29054 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
29055 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29056 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
29057 ; SSE3-NEXT: pand %xmm2, %xmm3
29058 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
29059 ; SSE3-NEXT: por %xmm3, %xmm0
29062 ; SSSE3-LABEL: ugt_35_v2i64:
29064 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29065 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29066 ; SSSE3-NEXT: pand %xmm1, %xmm2
29067 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29068 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29069 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29070 ; SSSE3-NEXT: psrlw $4, %xmm0
29071 ; SSSE3-NEXT: pand %xmm1, %xmm0
29072 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29073 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29074 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29075 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
29076 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29077 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29078 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
29079 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29080 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
29081 ; SSSE3-NEXT: pand %xmm1, %xmm2
29082 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
29083 ; SSSE3-NEXT: por %xmm2, %xmm0
29086 ; SSE41-LABEL: ugt_35_v2i64:
29088 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29089 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29090 ; SSE41-NEXT: pand %xmm1, %xmm2
29091 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29092 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29093 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29094 ; SSE41-NEXT: psrlw $4, %xmm0
29095 ; SSE41-NEXT: pand %xmm1, %xmm0
29096 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29097 ; SSE41-NEXT: paddb %xmm4, %xmm3
29098 ; SSE41-NEXT: pxor %xmm0, %xmm0
29099 ; SSE41-NEXT: psadbw %xmm0, %xmm3
29100 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29101 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29102 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
29103 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29104 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
29105 ; SSE41-NEXT: pand %xmm1, %xmm2
29106 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
29107 ; SSE41-NEXT: por %xmm2, %xmm0
29110 ; AVX1-LABEL: ugt_35_v2i64:
29112 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29113 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29114 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29115 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29116 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29117 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29118 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29119 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29120 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29121 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29122 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29125 ; AVX2-LABEL: ugt_35_v2i64:
29127 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29128 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29129 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29130 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29131 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29132 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29133 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29134 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29135 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29136 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29137 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29140 ; AVX512VPOPCNTDQ-LABEL: ugt_35_v2i64:
29141 ; AVX512VPOPCNTDQ: # %bb.0:
29142 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29143 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29144 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29145 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29146 ; AVX512VPOPCNTDQ-NEXT: retq
29148 ; AVX512VPOPCNTDQVL-LABEL: ugt_35_v2i64:
29149 ; AVX512VPOPCNTDQVL: # %bb.0:
29150 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29151 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
29152 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
29153 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29154 ; AVX512VPOPCNTDQVL-NEXT: retq
29156 ; BITALG_NOVLX-LABEL: ugt_35_v2i64:
29157 ; BITALG_NOVLX: # %bb.0:
29158 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29159 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29160 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29161 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29162 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29163 ; BITALG_NOVLX-NEXT: vzeroupper
29164 ; BITALG_NOVLX-NEXT: retq
29166 ; BITALG-LABEL: ugt_35_v2i64:
29168 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29169 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29170 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29171 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
29172 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
29173 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29174 ; BITALG-NEXT: retq
29175 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29176 %3 = icmp ugt <2 x i64> %2, <i64 35, i64 35>
29177 %4 = sext <2 x i1> %3 to <2 x i64>
29181 define <2 x i64> @ult_36_v2i64(<2 x i64> %0) {
29182 ; SSE2-LABEL: ult_36_v2i64:
29184 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29185 ; SSE2-NEXT: psrlw $1, %xmm1
29186 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29187 ; SSE2-NEXT: psubb %xmm1, %xmm0
29188 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29189 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29190 ; SSE2-NEXT: pand %xmm1, %xmm2
29191 ; SSE2-NEXT: psrlw $2, %xmm0
29192 ; SSE2-NEXT: pand %xmm1, %xmm0
29193 ; SSE2-NEXT: paddb %xmm2, %xmm0
29194 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29195 ; SSE2-NEXT: psrlw $4, %xmm1
29196 ; SSE2-NEXT: paddb %xmm0, %xmm1
29197 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29198 ; SSE2-NEXT: pxor %xmm0, %xmm0
29199 ; SSE2-NEXT: psadbw %xmm0, %xmm1
29200 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29201 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29202 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
29203 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483684,2147483684]
29204 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
29205 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29206 ; SSE2-NEXT: pand %xmm2, %xmm1
29207 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29208 ; SSE2-NEXT: por %xmm1, %xmm0
29211 ; SSE3-LABEL: ult_36_v2i64:
29213 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29214 ; SSE3-NEXT: psrlw $1, %xmm1
29215 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29216 ; SSE3-NEXT: psubb %xmm1, %xmm0
29217 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29218 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29219 ; SSE3-NEXT: pand %xmm1, %xmm2
29220 ; SSE3-NEXT: psrlw $2, %xmm0
29221 ; SSE3-NEXT: pand %xmm1, %xmm0
29222 ; SSE3-NEXT: paddb %xmm2, %xmm0
29223 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29224 ; SSE3-NEXT: psrlw $4, %xmm1
29225 ; SSE3-NEXT: paddb %xmm0, %xmm1
29226 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29227 ; SSE3-NEXT: pxor %xmm0, %xmm0
29228 ; SSE3-NEXT: psadbw %xmm0, %xmm1
29229 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29230 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29231 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
29232 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483684,2147483684]
29233 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
29234 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29235 ; SSE3-NEXT: pand %xmm2, %xmm1
29236 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29237 ; SSE3-NEXT: por %xmm1, %xmm0
29240 ; SSSE3-LABEL: ult_36_v2i64:
29242 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29243 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29244 ; SSSE3-NEXT: pand %xmm1, %xmm2
29245 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29246 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29247 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29248 ; SSSE3-NEXT: psrlw $4, %xmm0
29249 ; SSSE3-NEXT: pand %xmm1, %xmm0
29250 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29251 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29252 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29253 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
29254 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29255 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29256 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
29257 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483684,2147483684]
29258 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
29259 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
29260 ; SSSE3-NEXT: pand %xmm1, %xmm2
29261 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29262 ; SSSE3-NEXT: por %xmm2, %xmm0
29265 ; SSE41-LABEL: ult_36_v2i64:
29267 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29268 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29269 ; SSE41-NEXT: pand %xmm1, %xmm2
29270 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29271 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29272 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29273 ; SSE41-NEXT: psrlw $4, %xmm0
29274 ; SSE41-NEXT: pand %xmm1, %xmm0
29275 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29276 ; SSE41-NEXT: paddb %xmm4, %xmm3
29277 ; SSE41-NEXT: pxor %xmm0, %xmm0
29278 ; SSE41-NEXT: psadbw %xmm0, %xmm3
29279 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29280 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29281 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
29282 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483684,2147483684]
29283 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
29284 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
29285 ; SSE41-NEXT: pand %xmm1, %xmm2
29286 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29287 ; SSE41-NEXT: por %xmm2, %xmm0
29290 ; AVX1-LABEL: ult_36_v2i64:
29292 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29293 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29294 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29295 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29296 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29297 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29298 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29299 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29300 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29301 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29302 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36]
29303 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29306 ; AVX2-LABEL: ult_36_v2i64:
29308 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29309 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29310 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29311 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29312 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29313 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29314 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29315 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29316 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29317 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29318 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36]
29319 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29322 ; AVX512VPOPCNTDQ-LABEL: ult_36_v2i64:
29323 ; AVX512VPOPCNTDQ: # %bb.0:
29324 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29325 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29326 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36]
29327 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29328 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29329 ; AVX512VPOPCNTDQ-NEXT: retq
29331 ; AVX512VPOPCNTDQVL-LABEL: ult_36_v2i64:
29332 ; AVX512VPOPCNTDQVL: # %bb.0:
29333 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29334 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
29335 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
29336 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29337 ; AVX512VPOPCNTDQVL-NEXT: retq
29339 ; BITALG_NOVLX-LABEL: ult_36_v2i64:
29340 ; BITALG_NOVLX: # %bb.0:
29341 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29342 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29343 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29344 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29345 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36]
29346 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29347 ; BITALG_NOVLX-NEXT: vzeroupper
29348 ; BITALG_NOVLX-NEXT: retq
29350 ; BITALG-LABEL: ult_36_v2i64:
29352 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29353 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29354 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29355 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
29356 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
29357 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29358 ; BITALG-NEXT: retq
29359 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29360 %3 = icmp ult <2 x i64> %2, <i64 36, i64 36>
29361 %4 = sext <2 x i1> %3 to <2 x i64>
29365 define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) {
29366 ; SSE2-LABEL: ugt_36_v2i64:
29368 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29369 ; SSE2-NEXT: psrlw $1, %xmm1
29370 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29371 ; SSE2-NEXT: psubb %xmm1, %xmm0
29372 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29373 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29374 ; SSE2-NEXT: pand %xmm1, %xmm2
29375 ; SSE2-NEXT: psrlw $2, %xmm0
29376 ; SSE2-NEXT: pand %xmm1, %xmm0
29377 ; SSE2-NEXT: paddb %xmm2, %xmm0
29378 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29379 ; SSE2-NEXT: psrlw $4, %xmm1
29380 ; SSE2-NEXT: paddb %xmm0, %xmm1
29381 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29382 ; SSE2-NEXT: pxor %xmm0, %xmm0
29383 ; SSE2-NEXT: psadbw %xmm0, %xmm1
29384 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29385 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29386 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
29387 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29388 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
29389 ; SSE2-NEXT: pand %xmm2, %xmm3
29390 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
29391 ; SSE2-NEXT: por %xmm3, %xmm0
29394 ; SSE3-LABEL: ugt_36_v2i64:
29396 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29397 ; SSE3-NEXT: psrlw $1, %xmm1
29398 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29399 ; SSE3-NEXT: psubb %xmm1, %xmm0
29400 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29401 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29402 ; SSE3-NEXT: pand %xmm1, %xmm2
29403 ; SSE3-NEXT: psrlw $2, %xmm0
29404 ; SSE3-NEXT: pand %xmm1, %xmm0
29405 ; SSE3-NEXT: paddb %xmm2, %xmm0
29406 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29407 ; SSE3-NEXT: psrlw $4, %xmm1
29408 ; SSE3-NEXT: paddb %xmm0, %xmm1
29409 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29410 ; SSE3-NEXT: pxor %xmm0, %xmm0
29411 ; SSE3-NEXT: psadbw %xmm0, %xmm1
29412 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29413 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29414 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
29415 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29416 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
29417 ; SSE3-NEXT: pand %xmm2, %xmm3
29418 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
29419 ; SSE3-NEXT: por %xmm3, %xmm0
29422 ; SSSE3-LABEL: ugt_36_v2i64:
29424 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29425 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29426 ; SSSE3-NEXT: pand %xmm1, %xmm2
29427 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29428 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29429 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29430 ; SSSE3-NEXT: psrlw $4, %xmm0
29431 ; SSSE3-NEXT: pand %xmm1, %xmm0
29432 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29433 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29434 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29435 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
29436 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29437 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29438 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
29439 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29440 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
29441 ; SSSE3-NEXT: pand %xmm1, %xmm2
29442 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
29443 ; SSSE3-NEXT: por %xmm2, %xmm0
29446 ; SSE41-LABEL: ugt_36_v2i64:
29448 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29449 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29450 ; SSE41-NEXT: pand %xmm1, %xmm2
29451 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29452 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29453 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29454 ; SSE41-NEXT: psrlw $4, %xmm0
29455 ; SSE41-NEXT: pand %xmm1, %xmm0
29456 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29457 ; SSE41-NEXT: paddb %xmm4, %xmm3
29458 ; SSE41-NEXT: pxor %xmm0, %xmm0
29459 ; SSE41-NEXT: psadbw %xmm0, %xmm3
29460 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29461 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29462 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
29463 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29464 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
29465 ; SSE41-NEXT: pand %xmm1, %xmm2
29466 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
29467 ; SSE41-NEXT: por %xmm2, %xmm0
29470 ; AVX1-LABEL: ugt_36_v2i64:
29472 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29473 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29474 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29475 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29476 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29477 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29478 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29479 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29480 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29481 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29482 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29485 ; AVX2-LABEL: ugt_36_v2i64:
29487 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29488 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29489 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29490 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29491 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29492 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29493 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29494 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29495 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29496 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29497 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29500 ; AVX512VPOPCNTDQ-LABEL: ugt_36_v2i64:
29501 ; AVX512VPOPCNTDQ: # %bb.0:
29502 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29503 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29504 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29505 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29506 ; AVX512VPOPCNTDQ-NEXT: retq
29508 ; AVX512VPOPCNTDQVL-LABEL: ugt_36_v2i64:
29509 ; AVX512VPOPCNTDQVL: # %bb.0:
29510 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29511 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
29512 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
29513 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29514 ; AVX512VPOPCNTDQVL-NEXT: retq
29516 ; BITALG_NOVLX-LABEL: ugt_36_v2i64:
29517 ; BITALG_NOVLX: # %bb.0:
29518 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29519 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29520 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29521 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29522 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29523 ; BITALG_NOVLX-NEXT: vzeroupper
29524 ; BITALG_NOVLX-NEXT: retq
29526 ; BITALG-LABEL: ugt_36_v2i64:
29528 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29529 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29530 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29531 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
29532 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
29533 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29534 ; BITALG-NEXT: retq
29535 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29536 %3 = icmp ugt <2 x i64> %2, <i64 36, i64 36>
29537 %4 = sext <2 x i1> %3 to <2 x i64>
29541 define <2 x i64> @ult_37_v2i64(<2 x i64> %0) {
29542 ; SSE2-LABEL: ult_37_v2i64:
29544 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29545 ; SSE2-NEXT: psrlw $1, %xmm1
29546 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29547 ; SSE2-NEXT: psubb %xmm1, %xmm0
29548 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29549 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29550 ; SSE2-NEXT: pand %xmm1, %xmm2
29551 ; SSE2-NEXT: psrlw $2, %xmm0
29552 ; SSE2-NEXT: pand %xmm1, %xmm0
29553 ; SSE2-NEXT: paddb %xmm2, %xmm0
29554 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29555 ; SSE2-NEXT: psrlw $4, %xmm1
29556 ; SSE2-NEXT: paddb %xmm0, %xmm1
29557 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29558 ; SSE2-NEXT: pxor %xmm0, %xmm0
29559 ; SSE2-NEXT: psadbw %xmm0, %xmm1
29560 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29561 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29562 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
29563 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483685,2147483685]
29564 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
29565 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29566 ; SSE2-NEXT: pand %xmm2, %xmm1
29567 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29568 ; SSE2-NEXT: por %xmm1, %xmm0
29571 ; SSE3-LABEL: ult_37_v2i64:
29573 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29574 ; SSE3-NEXT: psrlw $1, %xmm1
29575 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29576 ; SSE3-NEXT: psubb %xmm1, %xmm0
29577 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29578 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29579 ; SSE3-NEXT: pand %xmm1, %xmm2
29580 ; SSE3-NEXT: psrlw $2, %xmm0
29581 ; SSE3-NEXT: pand %xmm1, %xmm0
29582 ; SSE3-NEXT: paddb %xmm2, %xmm0
29583 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29584 ; SSE3-NEXT: psrlw $4, %xmm1
29585 ; SSE3-NEXT: paddb %xmm0, %xmm1
29586 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29587 ; SSE3-NEXT: pxor %xmm0, %xmm0
29588 ; SSE3-NEXT: psadbw %xmm0, %xmm1
29589 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29590 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29591 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
29592 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483685,2147483685]
29593 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
29594 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29595 ; SSE3-NEXT: pand %xmm2, %xmm1
29596 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29597 ; SSE3-NEXT: por %xmm1, %xmm0
29600 ; SSSE3-LABEL: ult_37_v2i64:
29602 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29603 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29604 ; SSSE3-NEXT: pand %xmm1, %xmm2
29605 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29606 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29607 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29608 ; SSSE3-NEXT: psrlw $4, %xmm0
29609 ; SSSE3-NEXT: pand %xmm1, %xmm0
29610 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29611 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29612 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29613 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
29614 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29615 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29616 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
29617 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483685,2147483685]
29618 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
29619 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
29620 ; SSSE3-NEXT: pand %xmm1, %xmm2
29621 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29622 ; SSSE3-NEXT: por %xmm2, %xmm0
29625 ; SSE41-LABEL: ult_37_v2i64:
29627 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29628 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29629 ; SSE41-NEXT: pand %xmm1, %xmm2
29630 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29631 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29632 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29633 ; SSE41-NEXT: psrlw $4, %xmm0
29634 ; SSE41-NEXT: pand %xmm1, %xmm0
29635 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29636 ; SSE41-NEXT: paddb %xmm4, %xmm3
29637 ; SSE41-NEXT: pxor %xmm0, %xmm0
29638 ; SSE41-NEXT: psadbw %xmm0, %xmm3
29639 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29640 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29641 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
29642 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483685,2147483685]
29643 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
29644 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
29645 ; SSE41-NEXT: pand %xmm1, %xmm2
29646 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29647 ; SSE41-NEXT: por %xmm2, %xmm0
29650 ; AVX1-LABEL: ult_37_v2i64:
29652 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29653 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29654 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29655 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29656 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29657 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29658 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29659 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29660 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29661 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29662 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37]
29663 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29666 ; AVX2-LABEL: ult_37_v2i64:
29668 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29669 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29670 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29671 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29672 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29673 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29674 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29675 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29676 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29677 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29678 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37]
29679 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29682 ; AVX512VPOPCNTDQ-LABEL: ult_37_v2i64:
29683 ; AVX512VPOPCNTDQ: # %bb.0:
29684 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29685 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29686 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37]
29687 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29688 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29689 ; AVX512VPOPCNTDQ-NEXT: retq
29691 ; AVX512VPOPCNTDQVL-LABEL: ult_37_v2i64:
29692 ; AVX512VPOPCNTDQVL: # %bb.0:
29693 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29694 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
29695 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
29696 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29697 ; AVX512VPOPCNTDQVL-NEXT: retq
29699 ; BITALG_NOVLX-LABEL: ult_37_v2i64:
29700 ; BITALG_NOVLX: # %bb.0:
29701 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29702 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29703 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29704 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29705 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37]
29706 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29707 ; BITALG_NOVLX-NEXT: vzeroupper
29708 ; BITALG_NOVLX-NEXT: retq
29710 ; BITALG-LABEL: ult_37_v2i64:
29712 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29713 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29714 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29715 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
29716 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
29717 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29718 ; BITALG-NEXT: retq
29719 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29720 %3 = icmp ult <2 x i64> %2, <i64 37, i64 37>
29721 %4 = sext <2 x i1> %3 to <2 x i64>
29725 define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) {
29726 ; SSE2-LABEL: ugt_37_v2i64:
29728 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29729 ; SSE2-NEXT: psrlw $1, %xmm1
29730 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29731 ; SSE2-NEXT: psubb %xmm1, %xmm0
29732 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29733 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29734 ; SSE2-NEXT: pand %xmm1, %xmm2
29735 ; SSE2-NEXT: psrlw $2, %xmm0
29736 ; SSE2-NEXT: pand %xmm1, %xmm0
29737 ; SSE2-NEXT: paddb %xmm2, %xmm0
29738 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29739 ; SSE2-NEXT: psrlw $4, %xmm1
29740 ; SSE2-NEXT: paddb %xmm0, %xmm1
29741 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29742 ; SSE2-NEXT: pxor %xmm0, %xmm0
29743 ; SSE2-NEXT: psadbw %xmm0, %xmm1
29744 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29745 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29746 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
29747 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29748 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
29749 ; SSE2-NEXT: pand %xmm2, %xmm3
29750 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
29751 ; SSE2-NEXT: por %xmm3, %xmm0
29754 ; SSE3-LABEL: ugt_37_v2i64:
29756 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29757 ; SSE3-NEXT: psrlw $1, %xmm1
29758 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29759 ; SSE3-NEXT: psubb %xmm1, %xmm0
29760 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29761 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29762 ; SSE3-NEXT: pand %xmm1, %xmm2
29763 ; SSE3-NEXT: psrlw $2, %xmm0
29764 ; SSE3-NEXT: pand %xmm1, %xmm0
29765 ; SSE3-NEXT: paddb %xmm2, %xmm0
29766 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29767 ; SSE3-NEXT: psrlw $4, %xmm1
29768 ; SSE3-NEXT: paddb %xmm0, %xmm1
29769 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29770 ; SSE3-NEXT: pxor %xmm0, %xmm0
29771 ; SSE3-NEXT: psadbw %xmm0, %xmm1
29772 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29773 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29774 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
29775 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29776 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
29777 ; SSE3-NEXT: pand %xmm2, %xmm3
29778 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
29779 ; SSE3-NEXT: por %xmm3, %xmm0
29782 ; SSSE3-LABEL: ugt_37_v2i64:
29784 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29785 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29786 ; SSSE3-NEXT: pand %xmm1, %xmm2
29787 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29788 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29789 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29790 ; SSSE3-NEXT: psrlw $4, %xmm0
29791 ; SSSE3-NEXT: pand %xmm1, %xmm0
29792 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29793 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29794 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29795 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
29796 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29797 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29798 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
29799 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29800 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
29801 ; SSSE3-NEXT: pand %xmm1, %xmm2
29802 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
29803 ; SSSE3-NEXT: por %xmm2, %xmm0
29806 ; SSE41-LABEL: ugt_37_v2i64:
29808 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29809 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29810 ; SSE41-NEXT: pand %xmm1, %xmm2
29811 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29812 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29813 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29814 ; SSE41-NEXT: psrlw $4, %xmm0
29815 ; SSE41-NEXT: pand %xmm1, %xmm0
29816 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29817 ; SSE41-NEXT: paddb %xmm4, %xmm3
29818 ; SSE41-NEXT: pxor %xmm0, %xmm0
29819 ; SSE41-NEXT: psadbw %xmm0, %xmm3
29820 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29821 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29822 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
29823 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29824 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
29825 ; SSE41-NEXT: pand %xmm1, %xmm2
29826 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
29827 ; SSE41-NEXT: por %xmm2, %xmm0
29830 ; AVX1-LABEL: ugt_37_v2i64:
29832 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29833 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29834 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29835 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29836 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29837 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29838 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29839 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29840 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29841 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29842 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29845 ; AVX2-LABEL: ugt_37_v2i64:
29847 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29848 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29849 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29850 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29851 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29852 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29853 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29854 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29855 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29856 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29857 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29860 ; AVX512VPOPCNTDQ-LABEL: ugt_37_v2i64:
29861 ; AVX512VPOPCNTDQ: # %bb.0:
29862 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29863 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29864 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29865 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29866 ; AVX512VPOPCNTDQ-NEXT: retq
29868 ; AVX512VPOPCNTDQVL-LABEL: ugt_37_v2i64:
29869 ; AVX512VPOPCNTDQVL: # %bb.0:
29870 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29871 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
29872 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
29873 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29874 ; AVX512VPOPCNTDQVL-NEXT: retq
29876 ; BITALG_NOVLX-LABEL: ugt_37_v2i64:
29877 ; BITALG_NOVLX: # %bb.0:
29878 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29879 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29880 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29881 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29882 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29883 ; BITALG_NOVLX-NEXT: vzeroupper
29884 ; BITALG_NOVLX-NEXT: retq
29886 ; BITALG-LABEL: ugt_37_v2i64:
29888 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29889 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29890 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29891 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
29892 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
29893 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29894 ; BITALG-NEXT: retq
29895 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29896 %3 = icmp ugt <2 x i64> %2, <i64 37, i64 37>
29897 %4 = sext <2 x i1> %3 to <2 x i64>
29901 define <2 x i64> @ult_38_v2i64(<2 x i64> %0) {
29902 ; SSE2-LABEL: ult_38_v2i64:
29904 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29905 ; SSE2-NEXT: psrlw $1, %xmm1
29906 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29907 ; SSE2-NEXT: psubb %xmm1, %xmm0
29908 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29909 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29910 ; SSE2-NEXT: pand %xmm1, %xmm2
29911 ; SSE2-NEXT: psrlw $2, %xmm0
29912 ; SSE2-NEXT: pand %xmm1, %xmm0
29913 ; SSE2-NEXT: paddb %xmm2, %xmm0
29914 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29915 ; SSE2-NEXT: psrlw $4, %xmm1
29916 ; SSE2-NEXT: paddb %xmm0, %xmm1
29917 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29918 ; SSE2-NEXT: pxor %xmm0, %xmm0
29919 ; SSE2-NEXT: psadbw %xmm0, %xmm1
29920 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29921 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29922 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
29923 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483686,2147483686]
29924 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
29925 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29926 ; SSE2-NEXT: pand %xmm2, %xmm1
29927 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29928 ; SSE2-NEXT: por %xmm1, %xmm0
29931 ; SSE3-LABEL: ult_38_v2i64:
29933 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29934 ; SSE3-NEXT: psrlw $1, %xmm1
29935 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29936 ; SSE3-NEXT: psubb %xmm1, %xmm0
29937 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29938 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29939 ; SSE3-NEXT: pand %xmm1, %xmm2
29940 ; SSE3-NEXT: psrlw $2, %xmm0
29941 ; SSE3-NEXT: pand %xmm1, %xmm0
29942 ; SSE3-NEXT: paddb %xmm2, %xmm0
29943 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29944 ; SSE3-NEXT: psrlw $4, %xmm1
29945 ; SSE3-NEXT: paddb %xmm0, %xmm1
29946 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29947 ; SSE3-NEXT: pxor %xmm0, %xmm0
29948 ; SSE3-NEXT: psadbw %xmm0, %xmm1
29949 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29950 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
29951 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
29952 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483686,2147483686]
29953 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
29954 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29955 ; SSE3-NEXT: pand %xmm2, %xmm1
29956 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29957 ; SSE3-NEXT: por %xmm1, %xmm0
29960 ; SSSE3-LABEL: ult_38_v2i64:
29962 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29963 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29964 ; SSSE3-NEXT: pand %xmm1, %xmm2
29965 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29966 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29967 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29968 ; SSSE3-NEXT: psrlw $4, %xmm0
29969 ; SSSE3-NEXT: pand %xmm1, %xmm0
29970 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29971 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29972 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29973 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
29974 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
29975 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
29976 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
29977 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483686,2147483686]
29978 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
29979 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
29980 ; SSSE3-NEXT: pand %xmm1, %xmm2
29981 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
29982 ; SSSE3-NEXT: por %xmm2, %xmm0
29985 ; SSE41-LABEL: ult_38_v2i64:
29987 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29988 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29989 ; SSE41-NEXT: pand %xmm1, %xmm2
29990 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29991 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29992 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29993 ; SSE41-NEXT: psrlw $4, %xmm0
29994 ; SSE41-NEXT: pand %xmm1, %xmm0
29995 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29996 ; SSE41-NEXT: paddb %xmm4, %xmm3
29997 ; SSE41-NEXT: pxor %xmm0, %xmm0
29998 ; SSE41-NEXT: psadbw %xmm0, %xmm3
29999 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30000 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30001 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
30002 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483686,2147483686]
30003 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
30004 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
30005 ; SSE41-NEXT: pand %xmm1, %xmm2
30006 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
30007 ; SSE41-NEXT: por %xmm2, %xmm0
30010 ; AVX1-LABEL: ult_38_v2i64:
30012 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30013 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30014 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30015 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30016 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30017 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30018 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30019 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30020 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30021 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30022 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38]
30023 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30026 ; AVX2-LABEL: ult_38_v2i64:
30028 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30029 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30030 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30031 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30032 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30033 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30034 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30035 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30036 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30037 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30038 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38]
30039 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30042 ; AVX512VPOPCNTDQ-LABEL: ult_38_v2i64:
30043 ; AVX512VPOPCNTDQ: # %bb.0:
30044 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30045 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30046 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38]
30047 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30048 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30049 ; AVX512VPOPCNTDQ-NEXT: retq
30051 ; AVX512VPOPCNTDQVL-LABEL: ult_38_v2i64:
30052 ; AVX512VPOPCNTDQVL: # %bb.0:
30053 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30054 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30055 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30056 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30057 ; AVX512VPOPCNTDQVL-NEXT: retq
30059 ; BITALG_NOVLX-LABEL: ult_38_v2i64:
30060 ; BITALG_NOVLX: # %bb.0:
30061 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30062 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30063 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30064 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30065 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38]
30066 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30067 ; BITALG_NOVLX-NEXT: vzeroupper
30068 ; BITALG_NOVLX-NEXT: retq
30070 ; BITALG-LABEL: ult_38_v2i64:
30072 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30073 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30074 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30075 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30076 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30077 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30078 ; BITALG-NEXT: retq
30079 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30080 %3 = icmp ult <2 x i64> %2, <i64 38, i64 38>
30081 %4 = sext <2 x i1> %3 to <2 x i64>
30085 define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) {
30086 ; SSE2-LABEL: ugt_38_v2i64:
30088 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30089 ; SSE2-NEXT: psrlw $1, %xmm1
30090 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30091 ; SSE2-NEXT: psubb %xmm1, %xmm0
30092 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30093 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30094 ; SSE2-NEXT: pand %xmm1, %xmm2
30095 ; SSE2-NEXT: psrlw $2, %xmm0
30096 ; SSE2-NEXT: pand %xmm1, %xmm0
30097 ; SSE2-NEXT: paddb %xmm2, %xmm0
30098 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30099 ; SSE2-NEXT: psrlw $4, %xmm1
30100 ; SSE2-NEXT: paddb %xmm0, %xmm1
30101 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30102 ; SSE2-NEXT: pxor %xmm0, %xmm0
30103 ; SSE2-NEXT: psadbw %xmm0, %xmm1
30104 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30105 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
30106 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
30107 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30108 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
30109 ; SSE2-NEXT: pand %xmm2, %xmm3
30110 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
30111 ; SSE2-NEXT: por %xmm3, %xmm0
30114 ; SSE3-LABEL: ugt_38_v2i64:
30116 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30117 ; SSE3-NEXT: psrlw $1, %xmm1
30118 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30119 ; SSE3-NEXT: psubb %xmm1, %xmm0
30120 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30121 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30122 ; SSE3-NEXT: pand %xmm1, %xmm2
30123 ; SSE3-NEXT: psrlw $2, %xmm0
30124 ; SSE3-NEXT: pand %xmm1, %xmm0
30125 ; SSE3-NEXT: paddb %xmm2, %xmm0
30126 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30127 ; SSE3-NEXT: psrlw $4, %xmm1
30128 ; SSE3-NEXT: paddb %xmm0, %xmm1
30129 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30130 ; SSE3-NEXT: pxor %xmm0, %xmm0
30131 ; SSE3-NEXT: psadbw %xmm0, %xmm1
30132 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30133 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
30134 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
30135 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30136 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
30137 ; SSE3-NEXT: pand %xmm2, %xmm3
30138 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
30139 ; SSE3-NEXT: por %xmm3, %xmm0
30142 ; SSSE3-LABEL: ugt_38_v2i64:
30144 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30145 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30146 ; SSSE3-NEXT: pand %xmm1, %xmm2
30147 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30148 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30149 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30150 ; SSSE3-NEXT: psrlw $4, %xmm0
30151 ; SSSE3-NEXT: pand %xmm1, %xmm0
30152 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30153 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30154 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30155 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
30156 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30157 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30158 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
30159 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30160 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
30161 ; SSSE3-NEXT: pand %xmm1, %xmm2
30162 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
30163 ; SSSE3-NEXT: por %xmm2, %xmm0
30166 ; SSE41-LABEL: ugt_38_v2i64:
30168 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30169 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30170 ; SSE41-NEXT: pand %xmm1, %xmm2
30171 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30172 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30173 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30174 ; SSE41-NEXT: psrlw $4, %xmm0
30175 ; SSE41-NEXT: pand %xmm1, %xmm0
30176 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30177 ; SSE41-NEXT: paddb %xmm4, %xmm3
30178 ; SSE41-NEXT: pxor %xmm0, %xmm0
30179 ; SSE41-NEXT: psadbw %xmm0, %xmm3
30180 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30181 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30182 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
30183 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30184 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
30185 ; SSE41-NEXT: pand %xmm1, %xmm2
30186 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
30187 ; SSE41-NEXT: por %xmm2, %xmm0
30190 ; AVX1-LABEL: ugt_38_v2i64:
30192 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30193 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30194 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30195 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30196 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30197 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30198 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30199 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30200 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30201 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30202 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30205 ; AVX2-LABEL: ugt_38_v2i64:
30207 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30208 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30209 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30210 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30211 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30212 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30213 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30214 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30215 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30216 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30217 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30220 ; AVX512VPOPCNTDQ-LABEL: ugt_38_v2i64:
30221 ; AVX512VPOPCNTDQ: # %bb.0:
30222 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30223 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30224 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30225 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30226 ; AVX512VPOPCNTDQ-NEXT: retq
30228 ; AVX512VPOPCNTDQVL-LABEL: ugt_38_v2i64:
30229 ; AVX512VPOPCNTDQVL: # %bb.0:
30230 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30231 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30232 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30233 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30234 ; AVX512VPOPCNTDQVL-NEXT: retq
30236 ; BITALG_NOVLX-LABEL: ugt_38_v2i64:
30237 ; BITALG_NOVLX: # %bb.0:
30238 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30239 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30240 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30241 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30242 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30243 ; BITALG_NOVLX-NEXT: vzeroupper
30244 ; BITALG_NOVLX-NEXT: retq
30246 ; BITALG-LABEL: ugt_38_v2i64:
30248 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30249 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30250 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30251 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30252 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30253 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30254 ; BITALG-NEXT: retq
30255 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30256 %3 = icmp ugt <2 x i64> %2, <i64 38, i64 38>
30257 %4 = sext <2 x i1> %3 to <2 x i64>
30261 define <2 x i64> @ult_39_v2i64(<2 x i64> %0) {
30262 ; SSE2-LABEL: ult_39_v2i64:
30264 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30265 ; SSE2-NEXT: psrlw $1, %xmm1
30266 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30267 ; SSE2-NEXT: psubb %xmm1, %xmm0
30268 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30269 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30270 ; SSE2-NEXT: pand %xmm1, %xmm2
30271 ; SSE2-NEXT: psrlw $2, %xmm0
30272 ; SSE2-NEXT: pand %xmm1, %xmm0
30273 ; SSE2-NEXT: paddb %xmm2, %xmm0
30274 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30275 ; SSE2-NEXT: psrlw $4, %xmm1
30276 ; SSE2-NEXT: paddb %xmm0, %xmm1
30277 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30278 ; SSE2-NEXT: pxor %xmm0, %xmm0
30279 ; SSE2-NEXT: psadbw %xmm0, %xmm1
30280 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30281 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
30282 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
30283 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483687,2147483687]
30284 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
30285 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30286 ; SSE2-NEXT: pand %xmm2, %xmm1
30287 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
30288 ; SSE2-NEXT: por %xmm1, %xmm0
30291 ; SSE3-LABEL: ult_39_v2i64:
30293 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30294 ; SSE3-NEXT: psrlw $1, %xmm1
30295 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30296 ; SSE3-NEXT: psubb %xmm1, %xmm0
30297 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30298 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30299 ; SSE3-NEXT: pand %xmm1, %xmm2
30300 ; SSE3-NEXT: psrlw $2, %xmm0
30301 ; SSE3-NEXT: pand %xmm1, %xmm0
30302 ; SSE3-NEXT: paddb %xmm2, %xmm0
30303 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30304 ; SSE3-NEXT: psrlw $4, %xmm1
30305 ; SSE3-NEXT: paddb %xmm0, %xmm1
30306 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30307 ; SSE3-NEXT: pxor %xmm0, %xmm0
30308 ; SSE3-NEXT: psadbw %xmm0, %xmm1
30309 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30310 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
30311 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
30312 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483687,2147483687]
30313 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
30314 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30315 ; SSE3-NEXT: pand %xmm2, %xmm1
30316 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
30317 ; SSE3-NEXT: por %xmm1, %xmm0
30320 ; SSSE3-LABEL: ult_39_v2i64:
30322 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30323 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30324 ; SSSE3-NEXT: pand %xmm1, %xmm2
30325 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30326 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30327 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30328 ; SSSE3-NEXT: psrlw $4, %xmm0
30329 ; SSSE3-NEXT: pand %xmm1, %xmm0
30330 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30331 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30332 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30333 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
30334 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30335 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30336 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
30337 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483687,2147483687]
30338 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
30339 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
30340 ; SSSE3-NEXT: pand %xmm1, %xmm2
30341 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
30342 ; SSSE3-NEXT: por %xmm2, %xmm0
30345 ; SSE41-LABEL: ult_39_v2i64:
30347 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30348 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30349 ; SSE41-NEXT: pand %xmm1, %xmm2
30350 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30351 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30352 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30353 ; SSE41-NEXT: psrlw $4, %xmm0
30354 ; SSE41-NEXT: pand %xmm1, %xmm0
30355 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30356 ; SSE41-NEXT: paddb %xmm4, %xmm3
30357 ; SSE41-NEXT: pxor %xmm0, %xmm0
30358 ; SSE41-NEXT: psadbw %xmm0, %xmm3
30359 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30360 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30361 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
30362 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483687,2147483687]
30363 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
30364 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
30365 ; SSE41-NEXT: pand %xmm1, %xmm2
30366 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
30367 ; SSE41-NEXT: por %xmm2, %xmm0
30370 ; AVX1-LABEL: ult_39_v2i64:
30372 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30373 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30374 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30375 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30376 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30377 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30378 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30379 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30380 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30381 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30382 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39]
30383 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30386 ; AVX2-LABEL: ult_39_v2i64:
30388 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30389 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30390 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30391 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30392 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30393 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30394 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30395 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30396 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30397 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30398 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39]
30399 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30402 ; AVX512VPOPCNTDQ-LABEL: ult_39_v2i64:
30403 ; AVX512VPOPCNTDQ: # %bb.0:
30404 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30405 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30406 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39]
30407 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30408 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30409 ; AVX512VPOPCNTDQ-NEXT: retq
30411 ; AVX512VPOPCNTDQVL-LABEL: ult_39_v2i64:
30412 ; AVX512VPOPCNTDQVL: # %bb.0:
30413 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30414 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30415 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30416 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30417 ; AVX512VPOPCNTDQVL-NEXT: retq
30419 ; BITALG_NOVLX-LABEL: ult_39_v2i64:
30420 ; BITALG_NOVLX: # %bb.0:
30421 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30422 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30423 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30424 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30425 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39]
30426 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30427 ; BITALG_NOVLX-NEXT: vzeroupper
30428 ; BITALG_NOVLX-NEXT: retq
30430 ; BITALG-LABEL: ult_39_v2i64:
30432 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30433 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30434 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30435 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30436 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30437 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30438 ; BITALG-NEXT: retq
30439 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30440 %3 = icmp ult <2 x i64> %2, <i64 39, i64 39>
30441 %4 = sext <2 x i1> %3 to <2 x i64>
30445 define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) {
30446 ; SSE2-LABEL: ugt_39_v2i64:
30448 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30449 ; SSE2-NEXT: psrlw $1, %xmm1
30450 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30451 ; SSE2-NEXT: psubb %xmm1, %xmm0
30452 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30453 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30454 ; SSE2-NEXT: pand %xmm1, %xmm2
30455 ; SSE2-NEXT: psrlw $2, %xmm0
30456 ; SSE2-NEXT: pand %xmm1, %xmm0
30457 ; SSE2-NEXT: paddb %xmm2, %xmm0
30458 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30459 ; SSE2-NEXT: psrlw $4, %xmm1
30460 ; SSE2-NEXT: paddb %xmm0, %xmm1
30461 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30462 ; SSE2-NEXT: pxor %xmm0, %xmm0
30463 ; SSE2-NEXT: psadbw %xmm0, %xmm1
30464 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30465 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
30466 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
30467 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30468 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
30469 ; SSE2-NEXT: pand %xmm2, %xmm3
30470 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
30471 ; SSE2-NEXT: por %xmm3, %xmm0
30474 ; SSE3-LABEL: ugt_39_v2i64:
30476 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30477 ; SSE3-NEXT: psrlw $1, %xmm1
30478 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30479 ; SSE3-NEXT: psubb %xmm1, %xmm0
30480 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30481 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30482 ; SSE3-NEXT: pand %xmm1, %xmm2
30483 ; SSE3-NEXT: psrlw $2, %xmm0
30484 ; SSE3-NEXT: pand %xmm1, %xmm0
30485 ; SSE3-NEXT: paddb %xmm2, %xmm0
30486 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30487 ; SSE3-NEXT: psrlw $4, %xmm1
30488 ; SSE3-NEXT: paddb %xmm0, %xmm1
30489 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30490 ; SSE3-NEXT: pxor %xmm0, %xmm0
30491 ; SSE3-NEXT: psadbw %xmm0, %xmm1
30492 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30493 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
30494 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
30495 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30496 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
30497 ; SSE3-NEXT: pand %xmm2, %xmm3
30498 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
30499 ; SSE3-NEXT: por %xmm3, %xmm0
30502 ; SSSE3-LABEL: ugt_39_v2i64:
30504 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30505 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30506 ; SSSE3-NEXT: pand %xmm1, %xmm2
30507 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30508 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30509 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30510 ; SSSE3-NEXT: psrlw $4, %xmm0
30511 ; SSSE3-NEXT: pand %xmm1, %xmm0
30512 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30513 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30514 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30515 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
30516 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30517 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30518 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
30519 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30520 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
30521 ; SSSE3-NEXT: pand %xmm1, %xmm2
30522 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
30523 ; SSSE3-NEXT: por %xmm2, %xmm0
30526 ; SSE41-LABEL: ugt_39_v2i64:
30528 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30529 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30530 ; SSE41-NEXT: pand %xmm1, %xmm2
30531 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30532 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30533 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30534 ; SSE41-NEXT: psrlw $4, %xmm0
30535 ; SSE41-NEXT: pand %xmm1, %xmm0
30536 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30537 ; SSE41-NEXT: paddb %xmm4, %xmm3
30538 ; SSE41-NEXT: pxor %xmm0, %xmm0
30539 ; SSE41-NEXT: psadbw %xmm0, %xmm3
30540 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30541 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30542 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
30543 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30544 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
30545 ; SSE41-NEXT: pand %xmm1, %xmm2
30546 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
30547 ; SSE41-NEXT: por %xmm2, %xmm0
30550 ; AVX1-LABEL: ugt_39_v2i64:
30552 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30553 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30554 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30555 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30556 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30557 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30558 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30559 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30560 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30561 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30562 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30565 ; AVX2-LABEL: ugt_39_v2i64:
30567 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30568 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30569 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30570 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30571 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30572 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30573 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30574 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30575 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30576 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30577 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30580 ; AVX512VPOPCNTDQ-LABEL: ugt_39_v2i64:
30581 ; AVX512VPOPCNTDQ: # %bb.0:
30582 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30583 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30584 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30585 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30586 ; AVX512VPOPCNTDQ-NEXT: retq
30588 ; AVX512VPOPCNTDQVL-LABEL: ugt_39_v2i64:
30589 ; AVX512VPOPCNTDQVL: # %bb.0:
30590 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30591 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30592 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30593 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30594 ; AVX512VPOPCNTDQVL-NEXT: retq
30596 ; BITALG_NOVLX-LABEL: ugt_39_v2i64:
30597 ; BITALG_NOVLX: # %bb.0:
30598 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30599 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30600 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30601 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30602 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30603 ; BITALG_NOVLX-NEXT: vzeroupper
30604 ; BITALG_NOVLX-NEXT: retq
30606 ; BITALG-LABEL: ugt_39_v2i64:
30608 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30609 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30610 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30611 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30612 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30613 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30614 ; BITALG-NEXT: retq
30615 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30616 %3 = icmp ugt <2 x i64> %2, <i64 39, i64 39>
30617 %4 = sext <2 x i1> %3 to <2 x i64>
30621 define <2 x i64> @ult_40_v2i64(<2 x i64> %0) {
30622 ; SSE2-LABEL: ult_40_v2i64:
30624 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30625 ; SSE2-NEXT: psrlw $1, %xmm1
30626 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30627 ; SSE2-NEXT: psubb %xmm1, %xmm0
30628 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30629 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30630 ; SSE2-NEXT: pand %xmm1, %xmm2
30631 ; SSE2-NEXT: psrlw $2, %xmm0
30632 ; SSE2-NEXT: pand %xmm1, %xmm0
30633 ; SSE2-NEXT: paddb %xmm2, %xmm0
30634 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30635 ; SSE2-NEXT: psrlw $4, %xmm1
30636 ; SSE2-NEXT: paddb %xmm0, %xmm1
30637 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30638 ; SSE2-NEXT: pxor %xmm0, %xmm0
30639 ; SSE2-NEXT: psadbw %xmm0, %xmm1
30640 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30641 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
30642 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
30643 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483688,2147483688]
30644 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
30645 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30646 ; SSE2-NEXT: pand %xmm2, %xmm1
30647 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
30648 ; SSE2-NEXT: por %xmm1, %xmm0
30651 ; SSE3-LABEL: ult_40_v2i64:
30653 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30654 ; SSE3-NEXT: psrlw $1, %xmm1
30655 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30656 ; SSE3-NEXT: psubb %xmm1, %xmm0
30657 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30658 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30659 ; SSE3-NEXT: pand %xmm1, %xmm2
30660 ; SSE3-NEXT: psrlw $2, %xmm0
30661 ; SSE3-NEXT: pand %xmm1, %xmm0
30662 ; SSE3-NEXT: paddb %xmm2, %xmm0
30663 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30664 ; SSE3-NEXT: psrlw $4, %xmm1
30665 ; SSE3-NEXT: paddb %xmm0, %xmm1
30666 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30667 ; SSE3-NEXT: pxor %xmm0, %xmm0
30668 ; SSE3-NEXT: psadbw %xmm0, %xmm1
30669 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30670 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
30671 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
30672 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483688,2147483688]
30673 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
30674 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30675 ; SSE3-NEXT: pand %xmm2, %xmm1
30676 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
30677 ; SSE3-NEXT: por %xmm1, %xmm0
30680 ; SSSE3-LABEL: ult_40_v2i64:
30682 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30683 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30684 ; SSSE3-NEXT: pand %xmm1, %xmm2
30685 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30686 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30687 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30688 ; SSSE3-NEXT: psrlw $4, %xmm0
30689 ; SSSE3-NEXT: pand %xmm1, %xmm0
30690 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30691 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30692 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30693 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
30694 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30695 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30696 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
30697 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483688,2147483688]
30698 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
30699 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
30700 ; SSSE3-NEXT: pand %xmm1, %xmm2
30701 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
30702 ; SSSE3-NEXT: por %xmm2, %xmm0
30705 ; SSE41-LABEL: ult_40_v2i64:
30707 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30708 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30709 ; SSE41-NEXT: pand %xmm1, %xmm2
30710 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30711 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30712 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30713 ; SSE41-NEXT: psrlw $4, %xmm0
30714 ; SSE41-NEXT: pand %xmm1, %xmm0
30715 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30716 ; SSE41-NEXT: paddb %xmm4, %xmm3
30717 ; SSE41-NEXT: pxor %xmm0, %xmm0
30718 ; SSE41-NEXT: psadbw %xmm0, %xmm3
30719 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30720 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30721 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
30722 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483688,2147483688]
30723 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
30724 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
30725 ; SSE41-NEXT: pand %xmm1, %xmm2
30726 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
30727 ; SSE41-NEXT: por %xmm2, %xmm0
30730 ; AVX1-LABEL: ult_40_v2i64:
30732 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30733 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30734 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30735 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30736 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30737 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30738 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30739 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30740 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30741 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30742 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40]
30743 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30746 ; AVX2-LABEL: ult_40_v2i64:
30748 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30749 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30750 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30751 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30752 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30753 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30754 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30755 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30756 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30757 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30758 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40]
30759 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30762 ; AVX512VPOPCNTDQ-LABEL: ult_40_v2i64:
30763 ; AVX512VPOPCNTDQ: # %bb.0:
30764 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30765 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30766 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40]
30767 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30768 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30769 ; AVX512VPOPCNTDQ-NEXT: retq
30771 ; AVX512VPOPCNTDQVL-LABEL: ult_40_v2i64:
30772 ; AVX512VPOPCNTDQVL: # %bb.0:
30773 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30774 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30775 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30776 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30777 ; AVX512VPOPCNTDQVL-NEXT: retq
30779 ; BITALG_NOVLX-LABEL: ult_40_v2i64:
30780 ; BITALG_NOVLX: # %bb.0:
30781 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30782 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30783 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30784 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30785 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40]
30786 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30787 ; BITALG_NOVLX-NEXT: vzeroupper
30788 ; BITALG_NOVLX-NEXT: retq
30790 ; BITALG-LABEL: ult_40_v2i64:
30792 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30793 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30794 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30795 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30796 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30797 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30798 ; BITALG-NEXT: retq
30799 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30800 %3 = icmp ult <2 x i64> %2, <i64 40, i64 40>
30801 %4 = sext <2 x i1> %3 to <2 x i64>
30805 define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) {
30806 ; SSE2-LABEL: ugt_40_v2i64:
30808 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30809 ; SSE2-NEXT: psrlw $1, %xmm1
30810 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30811 ; SSE2-NEXT: psubb %xmm1, %xmm0
30812 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30813 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30814 ; SSE2-NEXT: pand %xmm1, %xmm2
30815 ; SSE2-NEXT: psrlw $2, %xmm0
30816 ; SSE2-NEXT: pand %xmm1, %xmm0
30817 ; SSE2-NEXT: paddb %xmm2, %xmm0
30818 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30819 ; SSE2-NEXT: psrlw $4, %xmm1
30820 ; SSE2-NEXT: paddb %xmm0, %xmm1
30821 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30822 ; SSE2-NEXT: pxor %xmm0, %xmm0
30823 ; SSE2-NEXT: psadbw %xmm0, %xmm1
30824 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30825 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
30826 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
30827 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30828 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
30829 ; SSE2-NEXT: pand %xmm2, %xmm3
30830 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
30831 ; SSE2-NEXT: por %xmm3, %xmm0
30834 ; SSE3-LABEL: ugt_40_v2i64:
30836 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30837 ; SSE3-NEXT: psrlw $1, %xmm1
30838 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30839 ; SSE3-NEXT: psubb %xmm1, %xmm0
30840 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30841 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30842 ; SSE3-NEXT: pand %xmm1, %xmm2
30843 ; SSE3-NEXT: psrlw $2, %xmm0
30844 ; SSE3-NEXT: pand %xmm1, %xmm0
30845 ; SSE3-NEXT: paddb %xmm2, %xmm0
30846 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30847 ; SSE3-NEXT: psrlw $4, %xmm1
30848 ; SSE3-NEXT: paddb %xmm0, %xmm1
30849 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30850 ; SSE3-NEXT: pxor %xmm0, %xmm0
30851 ; SSE3-NEXT: psadbw %xmm0, %xmm1
30852 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30853 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
30854 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
30855 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30856 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
30857 ; SSE3-NEXT: pand %xmm2, %xmm3
30858 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
30859 ; SSE3-NEXT: por %xmm3, %xmm0
30862 ; SSSE3-LABEL: ugt_40_v2i64:
30864 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30865 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30866 ; SSSE3-NEXT: pand %xmm1, %xmm2
30867 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30868 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30869 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30870 ; SSSE3-NEXT: psrlw $4, %xmm0
30871 ; SSSE3-NEXT: pand %xmm1, %xmm0
30872 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30873 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30874 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30875 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
30876 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30877 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30878 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
30879 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30880 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
30881 ; SSSE3-NEXT: pand %xmm1, %xmm2
30882 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
30883 ; SSSE3-NEXT: por %xmm2, %xmm0
30886 ; SSE41-LABEL: ugt_40_v2i64:
30888 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30889 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30890 ; SSE41-NEXT: pand %xmm1, %xmm2
30891 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30892 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30893 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30894 ; SSE41-NEXT: psrlw $4, %xmm0
30895 ; SSE41-NEXT: pand %xmm1, %xmm0
30896 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30897 ; SSE41-NEXT: paddb %xmm4, %xmm3
30898 ; SSE41-NEXT: pxor %xmm0, %xmm0
30899 ; SSE41-NEXT: psadbw %xmm0, %xmm3
30900 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30901 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
30902 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
30903 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
30904 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
30905 ; SSE41-NEXT: pand %xmm1, %xmm2
30906 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
30907 ; SSE41-NEXT: por %xmm2, %xmm0
30910 ; AVX1-LABEL: ugt_40_v2i64:
30912 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30913 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30914 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30915 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30916 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30917 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30918 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30919 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30920 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30921 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30922 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30925 ; AVX2-LABEL: ugt_40_v2i64:
30927 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30928 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30929 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30930 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30931 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30932 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30933 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30934 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30935 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30936 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30937 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30940 ; AVX512VPOPCNTDQ-LABEL: ugt_40_v2i64:
30941 ; AVX512VPOPCNTDQ: # %bb.0:
30942 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30943 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30944 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30945 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30946 ; AVX512VPOPCNTDQ-NEXT: retq
30948 ; AVX512VPOPCNTDQVL-LABEL: ugt_40_v2i64:
30949 ; AVX512VPOPCNTDQVL: # %bb.0:
30950 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30951 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30952 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30953 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30954 ; AVX512VPOPCNTDQVL-NEXT: retq
30956 ; BITALG_NOVLX-LABEL: ugt_40_v2i64:
30957 ; BITALG_NOVLX: # %bb.0:
30958 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30959 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30960 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30961 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30962 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30963 ; BITALG_NOVLX-NEXT: vzeroupper
30964 ; BITALG_NOVLX-NEXT: retq
30966 ; BITALG-LABEL: ugt_40_v2i64:
30968 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30969 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30970 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30971 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
30972 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
30973 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30974 ; BITALG-NEXT: retq
30975 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30976 %3 = icmp ugt <2 x i64> %2, <i64 40, i64 40>
30977 %4 = sext <2 x i1> %3 to <2 x i64>
30981 define <2 x i64> @ult_41_v2i64(<2 x i64> %0) {
30982 ; SSE2-LABEL: ult_41_v2i64:
30984 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30985 ; SSE2-NEXT: psrlw $1, %xmm1
30986 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30987 ; SSE2-NEXT: psubb %xmm1, %xmm0
30988 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30989 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30990 ; SSE2-NEXT: pand %xmm1, %xmm2
30991 ; SSE2-NEXT: psrlw $2, %xmm0
30992 ; SSE2-NEXT: pand %xmm1, %xmm0
30993 ; SSE2-NEXT: paddb %xmm2, %xmm0
30994 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30995 ; SSE2-NEXT: psrlw $4, %xmm1
30996 ; SSE2-NEXT: paddb %xmm0, %xmm1
30997 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30998 ; SSE2-NEXT: pxor %xmm0, %xmm0
30999 ; SSE2-NEXT: psadbw %xmm0, %xmm1
31000 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31001 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31002 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
31003 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483689,2147483689]
31004 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
31005 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31006 ; SSE2-NEXT: pand %xmm2, %xmm1
31007 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31008 ; SSE2-NEXT: por %xmm1, %xmm0
31011 ; SSE3-LABEL: ult_41_v2i64:
31013 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31014 ; SSE3-NEXT: psrlw $1, %xmm1
31015 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31016 ; SSE3-NEXT: psubb %xmm1, %xmm0
31017 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31018 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31019 ; SSE3-NEXT: pand %xmm1, %xmm2
31020 ; SSE3-NEXT: psrlw $2, %xmm0
31021 ; SSE3-NEXT: pand %xmm1, %xmm0
31022 ; SSE3-NEXT: paddb %xmm2, %xmm0
31023 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31024 ; SSE3-NEXT: psrlw $4, %xmm1
31025 ; SSE3-NEXT: paddb %xmm0, %xmm1
31026 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31027 ; SSE3-NEXT: pxor %xmm0, %xmm0
31028 ; SSE3-NEXT: psadbw %xmm0, %xmm1
31029 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31030 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31031 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
31032 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483689,2147483689]
31033 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
31034 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31035 ; SSE3-NEXT: pand %xmm2, %xmm1
31036 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31037 ; SSE3-NEXT: por %xmm1, %xmm0
31040 ; SSSE3-LABEL: ult_41_v2i64:
31042 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31043 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31044 ; SSSE3-NEXT: pand %xmm1, %xmm2
31045 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31046 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31047 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31048 ; SSSE3-NEXT: psrlw $4, %xmm0
31049 ; SSSE3-NEXT: pand %xmm1, %xmm0
31050 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31051 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31052 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31053 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
31054 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31055 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31056 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
31057 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483689,2147483689]
31058 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
31059 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
31060 ; SSSE3-NEXT: pand %xmm1, %xmm2
31061 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31062 ; SSSE3-NEXT: por %xmm2, %xmm0
31065 ; SSE41-LABEL: ult_41_v2i64:
31067 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31068 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31069 ; SSE41-NEXT: pand %xmm1, %xmm2
31070 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31071 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31072 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31073 ; SSE41-NEXT: psrlw $4, %xmm0
31074 ; SSE41-NEXT: pand %xmm1, %xmm0
31075 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31076 ; SSE41-NEXT: paddb %xmm4, %xmm3
31077 ; SSE41-NEXT: pxor %xmm0, %xmm0
31078 ; SSE41-NEXT: psadbw %xmm0, %xmm3
31079 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31080 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31081 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
31082 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483689,2147483689]
31083 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
31084 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
31085 ; SSE41-NEXT: pand %xmm1, %xmm2
31086 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31087 ; SSE41-NEXT: por %xmm2, %xmm0
31090 ; AVX1-LABEL: ult_41_v2i64:
31092 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31093 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31094 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31095 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31096 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31097 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31098 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31099 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31100 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31101 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31102 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41]
31103 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31106 ; AVX2-LABEL: ult_41_v2i64:
31108 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31109 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31110 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31111 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31112 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31113 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31114 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31115 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31116 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31117 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31118 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41]
31119 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31122 ; AVX512VPOPCNTDQ-LABEL: ult_41_v2i64:
31123 ; AVX512VPOPCNTDQ: # %bb.0:
31124 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31125 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31126 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41]
31127 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31128 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31129 ; AVX512VPOPCNTDQ-NEXT: retq
31131 ; AVX512VPOPCNTDQVL-LABEL: ult_41_v2i64:
31132 ; AVX512VPOPCNTDQVL: # %bb.0:
31133 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31134 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
31135 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
31136 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31137 ; AVX512VPOPCNTDQVL-NEXT: retq
31139 ; BITALG_NOVLX-LABEL: ult_41_v2i64:
31140 ; BITALG_NOVLX: # %bb.0:
31141 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31142 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31143 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31144 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31145 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41]
31146 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31147 ; BITALG_NOVLX-NEXT: vzeroupper
31148 ; BITALG_NOVLX-NEXT: retq
31150 ; BITALG-LABEL: ult_41_v2i64:
31152 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31153 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31154 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31155 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
31156 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
31157 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31158 ; BITALG-NEXT: retq
31159 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31160 %3 = icmp ult <2 x i64> %2, <i64 41, i64 41>
31161 %4 = sext <2 x i1> %3 to <2 x i64>
31165 define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) {
31166 ; SSE2-LABEL: ugt_41_v2i64:
31168 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31169 ; SSE2-NEXT: psrlw $1, %xmm1
31170 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31171 ; SSE2-NEXT: psubb %xmm1, %xmm0
31172 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31173 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31174 ; SSE2-NEXT: pand %xmm1, %xmm2
31175 ; SSE2-NEXT: psrlw $2, %xmm0
31176 ; SSE2-NEXT: pand %xmm1, %xmm0
31177 ; SSE2-NEXT: paddb %xmm2, %xmm0
31178 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31179 ; SSE2-NEXT: psrlw $4, %xmm1
31180 ; SSE2-NEXT: paddb %xmm0, %xmm1
31181 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31182 ; SSE2-NEXT: pxor %xmm0, %xmm0
31183 ; SSE2-NEXT: psadbw %xmm0, %xmm1
31184 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31185 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31186 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
31187 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31188 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
31189 ; SSE2-NEXT: pand %xmm2, %xmm3
31190 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
31191 ; SSE2-NEXT: por %xmm3, %xmm0
31194 ; SSE3-LABEL: ugt_41_v2i64:
31196 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31197 ; SSE3-NEXT: psrlw $1, %xmm1
31198 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31199 ; SSE3-NEXT: psubb %xmm1, %xmm0
31200 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31201 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31202 ; SSE3-NEXT: pand %xmm1, %xmm2
31203 ; SSE3-NEXT: psrlw $2, %xmm0
31204 ; SSE3-NEXT: pand %xmm1, %xmm0
31205 ; SSE3-NEXT: paddb %xmm2, %xmm0
31206 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31207 ; SSE3-NEXT: psrlw $4, %xmm1
31208 ; SSE3-NEXT: paddb %xmm0, %xmm1
31209 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31210 ; SSE3-NEXT: pxor %xmm0, %xmm0
31211 ; SSE3-NEXT: psadbw %xmm0, %xmm1
31212 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31213 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31214 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
31215 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31216 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
31217 ; SSE3-NEXT: pand %xmm2, %xmm3
31218 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
31219 ; SSE3-NEXT: por %xmm3, %xmm0
31222 ; SSSE3-LABEL: ugt_41_v2i64:
31224 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31225 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31226 ; SSSE3-NEXT: pand %xmm1, %xmm2
31227 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31228 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31229 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31230 ; SSSE3-NEXT: psrlw $4, %xmm0
31231 ; SSSE3-NEXT: pand %xmm1, %xmm0
31232 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31233 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31234 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31235 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
31236 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31237 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31238 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
31239 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31240 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
31241 ; SSSE3-NEXT: pand %xmm1, %xmm2
31242 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
31243 ; SSSE3-NEXT: por %xmm2, %xmm0
31246 ; SSE41-LABEL: ugt_41_v2i64:
31248 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31249 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31250 ; SSE41-NEXT: pand %xmm1, %xmm2
31251 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31252 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31253 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31254 ; SSE41-NEXT: psrlw $4, %xmm0
31255 ; SSE41-NEXT: pand %xmm1, %xmm0
31256 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31257 ; SSE41-NEXT: paddb %xmm4, %xmm3
31258 ; SSE41-NEXT: pxor %xmm0, %xmm0
31259 ; SSE41-NEXT: psadbw %xmm0, %xmm3
31260 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31261 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31262 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
31263 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31264 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
31265 ; SSE41-NEXT: pand %xmm1, %xmm2
31266 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
31267 ; SSE41-NEXT: por %xmm2, %xmm0
31270 ; AVX1-LABEL: ugt_41_v2i64:
31272 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31273 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31274 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31275 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31276 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31277 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31278 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31279 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31280 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31281 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31282 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31285 ; AVX2-LABEL: ugt_41_v2i64:
31287 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31288 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31289 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31290 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31291 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31292 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31293 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31294 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31295 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31296 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31297 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31300 ; AVX512VPOPCNTDQ-LABEL: ugt_41_v2i64:
31301 ; AVX512VPOPCNTDQ: # %bb.0:
31302 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31303 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31304 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31305 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31306 ; AVX512VPOPCNTDQ-NEXT: retq
31308 ; AVX512VPOPCNTDQVL-LABEL: ugt_41_v2i64:
31309 ; AVX512VPOPCNTDQVL: # %bb.0:
31310 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31311 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
31312 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
31313 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31314 ; AVX512VPOPCNTDQVL-NEXT: retq
31316 ; BITALG_NOVLX-LABEL: ugt_41_v2i64:
31317 ; BITALG_NOVLX: # %bb.0:
31318 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31319 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31320 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31321 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31322 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31323 ; BITALG_NOVLX-NEXT: vzeroupper
31324 ; BITALG_NOVLX-NEXT: retq
31326 ; BITALG-LABEL: ugt_41_v2i64:
31328 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31329 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31330 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31331 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
31332 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
31333 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31334 ; BITALG-NEXT: retq
31335 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31336 %3 = icmp ugt <2 x i64> %2, <i64 41, i64 41>
31337 %4 = sext <2 x i1> %3 to <2 x i64>
31341 define <2 x i64> @ult_42_v2i64(<2 x i64> %0) {
31342 ; SSE2-LABEL: ult_42_v2i64:
31344 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31345 ; SSE2-NEXT: psrlw $1, %xmm1
31346 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31347 ; SSE2-NEXT: psubb %xmm1, %xmm0
31348 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31349 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31350 ; SSE2-NEXT: pand %xmm1, %xmm2
31351 ; SSE2-NEXT: psrlw $2, %xmm0
31352 ; SSE2-NEXT: pand %xmm1, %xmm0
31353 ; SSE2-NEXT: paddb %xmm2, %xmm0
31354 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31355 ; SSE2-NEXT: psrlw $4, %xmm1
31356 ; SSE2-NEXT: paddb %xmm0, %xmm1
31357 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31358 ; SSE2-NEXT: pxor %xmm0, %xmm0
31359 ; SSE2-NEXT: psadbw %xmm0, %xmm1
31360 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31361 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31362 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
31363 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483690,2147483690]
31364 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
31365 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31366 ; SSE2-NEXT: pand %xmm2, %xmm1
31367 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31368 ; SSE2-NEXT: por %xmm1, %xmm0
31371 ; SSE3-LABEL: ult_42_v2i64:
31373 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31374 ; SSE3-NEXT: psrlw $1, %xmm1
31375 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31376 ; SSE3-NEXT: psubb %xmm1, %xmm0
31377 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31378 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31379 ; SSE3-NEXT: pand %xmm1, %xmm2
31380 ; SSE3-NEXT: psrlw $2, %xmm0
31381 ; SSE3-NEXT: pand %xmm1, %xmm0
31382 ; SSE3-NEXT: paddb %xmm2, %xmm0
31383 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31384 ; SSE3-NEXT: psrlw $4, %xmm1
31385 ; SSE3-NEXT: paddb %xmm0, %xmm1
31386 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31387 ; SSE3-NEXT: pxor %xmm0, %xmm0
31388 ; SSE3-NEXT: psadbw %xmm0, %xmm1
31389 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31390 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31391 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
31392 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483690,2147483690]
31393 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
31394 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31395 ; SSE3-NEXT: pand %xmm2, %xmm1
31396 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31397 ; SSE3-NEXT: por %xmm1, %xmm0
31400 ; SSSE3-LABEL: ult_42_v2i64:
31402 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31403 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31404 ; SSSE3-NEXT: pand %xmm1, %xmm2
31405 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31406 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31407 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31408 ; SSSE3-NEXT: psrlw $4, %xmm0
31409 ; SSSE3-NEXT: pand %xmm1, %xmm0
31410 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31411 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31412 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31413 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
31414 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31415 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31416 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
31417 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483690,2147483690]
31418 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
31419 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
31420 ; SSSE3-NEXT: pand %xmm1, %xmm2
31421 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31422 ; SSSE3-NEXT: por %xmm2, %xmm0
31425 ; SSE41-LABEL: ult_42_v2i64:
31427 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31428 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31429 ; SSE41-NEXT: pand %xmm1, %xmm2
31430 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31431 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31432 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31433 ; SSE41-NEXT: psrlw $4, %xmm0
31434 ; SSE41-NEXT: pand %xmm1, %xmm0
31435 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31436 ; SSE41-NEXT: paddb %xmm4, %xmm3
31437 ; SSE41-NEXT: pxor %xmm0, %xmm0
31438 ; SSE41-NEXT: psadbw %xmm0, %xmm3
31439 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31440 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31441 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
31442 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483690,2147483690]
31443 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
31444 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
31445 ; SSE41-NEXT: pand %xmm1, %xmm2
31446 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31447 ; SSE41-NEXT: por %xmm2, %xmm0
31450 ; AVX1-LABEL: ult_42_v2i64:
31452 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31453 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31454 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31455 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31456 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31457 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31458 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31459 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31460 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31461 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31462 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42]
31463 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31466 ; AVX2-LABEL: ult_42_v2i64:
31468 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31469 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31470 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31471 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31472 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31473 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31474 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31475 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31476 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31477 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31478 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42]
31479 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31482 ; AVX512VPOPCNTDQ-LABEL: ult_42_v2i64:
31483 ; AVX512VPOPCNTDQ: # %bb.0:
31484 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31485 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31486 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42]
31487 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31488 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31489 ; AVX512VPOPCNTDQ-NEXT: retq
31491 ; AVX512VPOPCNTDQVL-LABEL: ult_42_v2i64:
31492 ; AVX512VPOPCNTDQVL: # %bb.0:
31493 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31494 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
31495 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
31496 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31497 ; AVX512VPOPCNTDQVL-NEXT: retq
31499 ; BITALG_NOVLX-LABEL: ult_42_v2i64:
31500 ; BITALG_NOVLX: # %bb.0:
31501 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31502 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31503 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31504 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31505 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42]
31506 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31507 ; BITALG_NOVLX-NEXT: vzeroupper
31508 ; BITALG_NOVLX-NEXT: retq
31510 ; BITALG-LABEL: ult_42_v2i64:
31512 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31513 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31514 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31515 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
31516 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
31517 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31518 ; BITALG-NEXT: retq
31519 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31520 %3 = icmp ult <2 x i64> %2, <i64 42, i64 42>
31521 %4 = sext <2 x i1> %3 to <2 x i64>
31525 define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) {
31526 ; SSE2-LABEL: ugt_42_v2i64:
31528 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31529 ; SSE2-NEXT: psrlw $1, %xmm1
31530 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31531 ; SSE2-NEXT: psubb %xmm1, %xmm0
31532 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31533 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31534 ; SSE2-NEXT: pand %xmm1, %xmm2
31535 ; SSE2-NEXT: psrlw $2, %xmm0
31536 ; SSE2-NEXT: pand %xmm1, %xmm0
31537 ; SSE2-NEXT: paddb %xmm2, %xmm0
31538 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31539 ; SSE2-NEXT: psrlw $4, %xmm1
31540 ; SSE2-NEXT: paddb %xmm0, %xmm1
31541 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31542 ; SSE2-NEXT: pxor %xmm0, %xmm0
31543 ; SSE2-NEXT: psadbw %xmm0, %xmm1
31544 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31545 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31546 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
31547 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31548 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
31549 ; SSE2-NEXT: pand %xmm2, %xmm3
31550 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
31551 ; SSE2-NEXT: por %xmm3, %xmm0
31554 ; SSE3-LABEL: ugt_42_v2i64:
31556 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31557 ; SSE3-NEXT: psrlw $1, %xmm1
31558 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31559 ; SSE3-NEXT: psubb %xmm1, %xmm0
31560 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31561 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31562 ; SSE3-NEXT: pand %xmm1, %xmm2
31563 ; SSE3-NEXT: psrlw $2, %xmm0
31564 ; SSE3-NEXT: pand %xmm1, %xmm0
31565 ; SSE3-NEXT: paddb %xmm2, %xmm0
31566 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31567 ; SSE3-NEXT: psrlw $4, %xmm1
31568 ; SSE3-NEXT: paddb %xmm0, %xmm1
31569 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31570 ; SSE3-NEXT: pxor %xmm0, %xmm0
31571 ; SSE3-NEXT: psadbw %xmm0, %xmm1
31572 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31573 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31574 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
31575 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31576 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
31577 ; SSE3-NEXT: pand %xmm2, %xmm3
31578 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
31579 ; SSE3-NEXT: por %xmm3, %xmm0
31582 ; SSSE3-LABEL: ugt_42_v2i64:
31584 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31585 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31586 ; SSSE3-NEXT: pand %xmm1, %xmm2
31587 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31588 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31589 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31590 ; SSSE3-NEXT: psrlw $4, %xmm0
31591 ; SSSE3-NEXT: pand %xmm1, %xmm0
31592 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31593 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31594 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31595 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
31596 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31597 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31598 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
31599 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31600 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
31601 ; SSSE3-NEXT: pand %xmm1, %xmm2
31602 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
31603 ; SSSE3-NEXT: por %xmm2, %xmm0
31606 ; SSE41-LABEL: ugt_42_v2i64:
31608 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31609 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31610 ; SSE41-NEXT: pand %xmm1, %xmm2
31611 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31612 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31613 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31614 ; SSE41-NEXT: psrlw $4, %xmm0
31615 ; SSE41-NEXT: pand %xmm1, %xmm0
31616 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31617 ; SSE41-NEXT: paddb %xmm4, %xmm3
31618 ; SSE41-NEXT: pxor %xmm0, %xmm0
31619 ; SSE41-NEXT: psadbw %xmm0, %xmm3
31620 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31621 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31622 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
31623 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31624 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
31625 ; SSE41-NEXT: pand %xmm1, %xmm2
31626 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
31627 ; SSE41-NEXT: por %xmm2, %xmm0
31630 ; AVX1-LABEL: ugt_42_v2i64:
31632 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31633 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31634 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31635 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31636 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31637 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31638 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31639 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31640 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31641 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31642 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31645 ; AVX2-LABEL: ugt_42_v2i64:
31647 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31648 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31649 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31650 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31651 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31652 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31653 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31654 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31655 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31656 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31657 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31660 ; AVX512VPOPCNTDQ-LABEL: ugt_42_v2i64:
31661 ; AVX512VPOPCNTDQ: # %bb.0:
31662 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31663 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31664 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31665 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31666 ; AVX512VPOPCNTDQ-NEXT: retq
31668 ; AVX512VPOPCNTDQVL-LABEL: ugt_42_v2i64:
31669 ; AVX512VPOPCNTDQVL: # %bb.0:
31670 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31671 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
31672 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
31673 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31674 ; AVX512VPOPCNTDQVL-NEXT: retq
31676 ; BITALG_NOVLX-LABEL: ugt_42_v2i64:
31677 ; BITALG_NOVLX: # %bb.0:
31678 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31679 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31680 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31681 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31682 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31683 ; BITALG_NOVLX-NEXT: vzeroupper
31684 ; BITALG_NOVLX-NEXT: retq
31686 ; BITALG-LABEL: ugt_42_v2i64:
31688 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31689 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31690 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31691 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
31692 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
31693 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31694 ; BITALG-NEXT: retq
31695 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31696 %3 = icmp ugt <2 x i64> %2, <i64 42, i64 42>
31697 %4 = sext <2 x i1> %3 to <2 x i64>
31701 define <2 x i64> @ult_43_v2i64(<2 x i64> %0) {
31702 ; SSE2-LABEL: ult_43_v2i64:
31704 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31705 ; SSE2-NEXT: psrlw $1, %xmm1
31706 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31707 ; SSE2-NEXT: psubb %xmm1, %xmm0
31708 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31709 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31710 ; SSE2-NEXT: pand %xmm1, %xmm2
31711 ; SSE2-NEXT: psrlw $2, %xmm0
31712 ; SSE2-NEXT: pand %xmm1, %xmm0
31713 ; SSE2-NEXT: paddb %xmm2, %xmm0
31714 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31715 ; SSE2-NEXT: psrlw $4, %xmm1
31716 ; SSE2-NEXT: paddb %xmm0, %xmm1
31717 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31718 ; SSE2-NEXT: pxor %xmm0, %xmm0
31719 ; SSE2-NEXT: psadbw %xmm0, %xmm1
31720 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31721 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31722 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
31723 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483691,2147483691]
31724 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
31725 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31726 ; SSE2-NEXT: pand %xmm2, %xmm1
31727 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31728 ; SSE2-NEXT: por %xmm1, %xmm0
31731 ; SSE3-LABEL: ult_43_v2i64:
31733 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31734 ; SSE3-NEXT: psrlw $1, %xmm1
31735 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31736 ; SSE3-NEXT: psubb %xmm1, %xmm0
31737 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31738 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31739 ; SSE3-NEXT: pand %xmm1, %xmm2
31740 ; SSE3-NEXT: psrlw $2, %xmm0
31741 ; SSE3-NEXT: pand %xmm1, %xmm0
31742 ; SSE3-NEXT: paddb %xmm2, %xmm0
31743 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31744 ; SSE3-NEXT: psrlw $4, %xmm1
31745 ; SSE3-NEXT: paddb %xmm0, %xmm1
31746 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31747 ; SSE3-NEXT: pxor %xmm0, %xmm0
31748 ; SSE3-NEXT: psadbw %xmm0, %xmm1
31749 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31750 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31751 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
31752 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483691,2147483691]
31753 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
31754 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31755 ; SSE3-NEXT: pand %xmm2, %xmm1
31756 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31757 ; SSE3-NEXT: por %xmm1, %xmm0
31760 ; SSSE3-LABEL: ult_43_v2i64:
31762 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31763 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31764 ; SSSE3-NEXT: pand %xmm1, %xmm2
31765 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31766 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31767 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31768 ; SSSE3-NEXT: psrlw $4, %xmm0
31769 ; SSSE3-NEXT: pand %xmm1, %xmm0
31770 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31771 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31772 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31773 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
31774 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31775 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31776 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
31777 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483691,2147483691]
31778 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
31779 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
31780 ; SSSE3-NEXT: pand %xmm1, %xmm2
31781 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31782 ; SSSE3-NEXT: por %xmm2, %xmm0
31785 ; SSE41-LABEL: ult_43_v2i64:
31787 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31788 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31789 ; SSE41-NEXT: pand %xmm1, %xmm2
31790 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31791 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31792 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31793 ; SSE41-NEXT: psrlw $4, %xmm0
31794 ; SSE41-NEXT: pand %xmm1, %xmm0
31795 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31796 ; SSE41-NEXT: paddb %xmm4, %xmm3
31797 ; SSE41-NEXT: pxor %xmm0, %xmm0
31798 ; SSE41-NEXT: psadbw %xmm0, %xmm3
31799 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31800 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31801 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
31802 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483691,2147483691]
31803 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
31804 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
31805 ; SSE41-NEXT: pand %xmm1, %xmm2
31806 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31807 ; SSE41-NEXT: por %xmm2, %xmm0
31810 ; AVX1-LABEL: ult_43_v2i64:
31812 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31813 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31814 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31815 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31816 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31817 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31818 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31819 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31820 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31821 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31822 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43]
31823 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31826 ; AVX2-LABEL: ult_43_v2i64:
31828 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31829 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31830 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31831 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31832 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31833 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31834 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31835 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31836 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31837 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31838 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43]
31839 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31842 ; AVX512VPOPCNTDQ-LABEL: ult_43_v2i64:
31843 ; AVX512VPOPCNTDQ: # %bb.0:
31844 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31845 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31846 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43]
31847 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31848 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31849 ; AVX512VPOPCNTDQ-NEXT: retq
31851 ; AVX512VPOPCNTDQVL-LABEL: ult_43_v2i64:
31852 ; AVX512VPOPCNTDQVL: # %bb.0:
31853 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31854 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
31855 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
31856 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31857 ; AVX512VPOPCNTDQVL-NEXT: retq
31859 ; BITALG_NOVLX-LABEL: ult_43_v2i64:
31860 ; BITALG_NOVLX: # %bb.0:
31861 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31862 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31863 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31864 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31865 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43]
31866 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31867 ; BITALG_NOVLX-NEXT: vzeroupper
31868 ; BITALG_NOVLX-NEXT: retq
31870 ; BITALG-LABEL: ult_43_v2i64:
31872 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31873 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31874 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31875 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
31876 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
31877 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
31878 ; BITALG-NEXT: retq
31879 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31880 %3 = icmp ult <2 x i64> %2, <i64 43, i64 43>
31881 %4 = sext <2 x i1> %3 to <2 x i64>
31885 define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) {
31886 ; SSE2-LABEL: ugt_43_v2i64:
31888 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31889 ; SSE2-NEXT: psrlw $1, %xmm1
31890 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31891 ; SSE2-NEXT: psubb %xmm1, %xmm0
31892 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31893 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31894 ; SSE2-NEXT: pand %xmm1, %xmm2
31895 ; SSE2-NEXT: psrlw $2, %xmm0
31896 ; SSE2-NEXT: pand %xmm1, %xmm0
31897 ; SSE2-NEXT: paddb %xmm2, %xmm0
31898 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31899 ; SSE2-NEXT: psrlw $4, %xmm1
31900 ; SSE2-NEXT: paddb %xmm0, %xmm1
31901 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31902 ; SSE2-NEXT: pxor %xmm0, %xmm0
31903 ; SSE2-NEXT: psadbw %xmm0, %xmm1
31904 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31905 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31906 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
31907 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31908 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
31909 ; SSE2-NEXT: pand %xmm2, %xmm3
31910 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
31911 ; SSE2-NEXT: por %xmm3, %xmm0
31914 ; SSE3-LABEL: ugt_43_v2i64:
31916 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31917 ; SSE3-NEXT: psrlw $1, %xmm1
31918 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31919 ; SSE3-NEXT: psubb %xmm1, %xmm0
31920 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31921 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31922 ; SSE3-NEXT: pand %xmm1, %xmm2
31923 ; SSE3-NEXT: psrlw $2, %xmm0
31924 ; SSE3-NEXT: pand %xmm1, %xmm0
31925 ; SSE3-NEXT: paddb %xmm2, %xmm0
31926 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31927 ; SSE3-NEXT: psrlw $4, %xmm1
31928 ; SSE3-NEXT: paddb %xmm0, %xmm1
31929 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31930 ; SSE3-NEXT: pxor %xmm0, %xmm0
31931 ; SSE3-NEXT: psadbw %xmm0, %xmm1
31932 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31933 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
31934 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
31935 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31936 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
31937 ; SSE3-NEXT: pand %xmm2, %xmm3
31938 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
31939 ; SSE3-NEXT: por %xmm3, %xmm0
31942 ; SSSE3-LABEL: ugt_43_v2i64:
31944 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31945 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31946 ; SSSE3-NEXT: pand %xmm1, %xmm2
31947 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31948 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31949 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31950 ; SSSE3-NEXT: psrlw $4, %xmm0
31951 ; SSSE3-NEXT: pand %xmm1, %xmm0
31952 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31953 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31954 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31955 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
31956 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31957 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31958 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
31959 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31960 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
31961 ; SSSE3-NEXT: pand %xmm1, %xmm2
31962 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
31963 ; SSSE3-NEXT: por %xmm2, %xmm0
31966 ; SSE41-LABEL: ugt_43_v2i64:
31968 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31969 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31970 ; SSE41-NEXT: pand %xmm1, %xmm2
31971 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31972 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31973 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31974 ; SSE41-NEXT: psrlw $4, %xmm0
31975 ; SSE41-NEXT: pand %xmm1, %xmm0
31976 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31977 ; SSE41-NEXT: paddb %xmm4, %xmm3
31978 ; SSE41-NEXT: pxor %xmm0, %xmm0
31979 ; SSE41-NEXT: psadbw %xmm0, %xmm3
31980 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31981 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
31982 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
31983 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
31984 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
31985 ; SSE41-NEXT: pand %xmm1, %xmm2
31986 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
31987 ; SSE41-NEXT: por %xmm2, %xmm0
31990 ; AVX1-LABEL: ugt_43_v2i64:
31992 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31993 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31994 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31995 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31996 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31997 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31998 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31999 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32000 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32001 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32002 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32005 ; AVX2-LABEL: ugt_43_v2i64:
32007 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32008 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32009 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32010 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32011 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32012 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32013 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32014 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32015 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32016 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32017 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32020 ; AVX512VPOPCNTDQ-LABEL: ugt_43_v2i64:
32021 ; AVX512VPOPCNTDQ: # %bb.0:
32022 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32023 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32024 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32025 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32026 ; AVX512VPOPCNTDQ-NEXT: retq
32028 ; AVX512VPOPCNTDQVL-LABEL: ugt_43_v2i64:
32029 ; AVX512VPOPCNTDQVL: # %bb.0:
32030 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32031 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32032 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32033 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32034 ; AVX512VPOPCNTDQVL-NEXT: retq
32036 ; BITALG_NOVLX-LABEL: ugt_43_v2i64:
32037 ; BITALG_NOVLX: # %bb.0:
32038 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32039 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32040 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32041 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32042 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32043 ; BITALG_NOVLX-NEXT: vzeroupper
32044 ; BITALG_NOVLX-NEXT: retq
32046 ; BITALG-LABEL: ugt_43_v2i64:
32048 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32049 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32050 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32051 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32052 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32053 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32054 ; BITALG-NEXT: retq
32055 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32056 %3 = icmp ugt <2 x i64> %2, <i64 43, i64 43>
32057 %4 = sext <2 x i1> %3 to <2 x i64>
32061 define <2 x i64> @ult_44_v2i64(<2 x i64> %0) {
32062 ; SSE2-LABEL: ult_44_v2i64:
32064 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32065 ; SSE2-NEXT: psrlw $1, %xmm1
32066 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32067 ; SSE2-NEXT: psubb %xmm1, %xmm0
32068 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32069 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32070 ; SSE2-NEXT: pand %xmm1, %xmm2
32071 ; SSE2-NEXT: psrlw $2, %xmm0
32072 ; SSE2-NEXT: pand %xmm1, %xmm0
32073 ; SSE2-NEXT: paddb %xmm2, %xmm0
32074 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32075 ; SSE2-NEXT: psrlw $4, %xmm1
32076 ; SSE2-NEXT: paddb %xmm0, %xmm1
32077 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32078 ; SSE2-NEXT: pxor %xmm0, %xmm0
32079 ; SSE2-NEXT: psadbw %xmm0, %xmm1
32080 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32081 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32082 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
32083 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483692,2147483692]
32084 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
32085 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32086 ; SSE2-NEXT: pand %xmm2, %xmm1
32087 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32088 ; SSE2-NEXT: por %xmm1, %xmm0
32091 ; SSE3-LABEL: ult_44_v2i64:
32093 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32094 ; SSE3-NEXT: psrlw $1, %xmm1
32095 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32096 ; SSE3-NEXT: psubb %xmm1, %xmm0
32097 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32098 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32099 ; SSE3-NEXT: pand %xmm1, %xmm2
32100 ; SSE3-NEXT: psrlw $2, %xmm0
32101 ; SSE3-NEXT: pand %xmm1, %xmm0
32102 ; SSE3-NEXT: paddb %xmm2, %xmm0
32103 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32104 ; SSE3-NEXT: psrlw $4, %xmm1
32105 ; SSE3-NEXT: paddb %xmm0, %xmm1
32106 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32107 ; SSE3-NEXT: pxor %xmm0, %xmm0
32108 ; SSE3-NEXT: psadbw %xmm0, %xmm1
32109 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32110 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32111 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
32112 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483692,2147483692]
32113 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
32114 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32115 ; SSE3-NEXT: pand %xmm2, %xmm1
32116 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32117 ; SSE3-NEXT: por %xmm1, %xmm0
32120 ; SSSE3-LABEL: ult_44_v2i64:
32122 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32123 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32124 ; SSSE3-NEXT: pand %xmm1, %xmm2
32125 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32126 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32127 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32128 ; SSSE3-NEXT: psrlw $4, %xmm0
32129 ; SSSE3-NEXT: pand %xmm1, %xmm0
32130 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32131 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32132 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32133 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
32134 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32135 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
32136 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
32137 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483692,2147483692]
32138 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
32139 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
32140 ; SSSE3-NEXT: pand %xmm1, %xmm2
32141 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32142 ; SSSE3-NEXT: por %xmm2, %xmm0
32145 ; SSE41-LABEL: ult_44_v2i64:
32147 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32148 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32149 ; SSE41-NEXT: pand %xmm1, %xmm2
32150 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32151 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32152 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32153 ; SSE41-NEXT: psrlw $4, %xmm0
32154 ; SSE41-NEXT: pand %xmm1, %xmm0
32155 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32156 ; SSE41-NEXT: paddb %xmm4, %xmm3
32157 ; SSE41-NEXT: pxor %xmm0, %xmm0
32158 ; SSE41-NEXT: psadbw %xmm0, %xmm3
32159 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32160 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
32161 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
32162 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483692,2147483692]
32163 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
32164 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
32165 ; SSE41-NEXT: pand %xmm1, %xmm2
32166 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32167 ; SSE41-NEXT: por %xmm2, %xmm0
32170 ; AVX1-LABEL: ult_44_v2i64:
32172 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32173 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32174 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32175 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32176 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32177 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32178 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32179 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32180 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32181 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32182 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44]
32183 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32186 ; AVX2-LABEL: ult_44_v2i64:
32188 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32189 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32190 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32191 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32192 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32193 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32194 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32195 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32196 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32197 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32198 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44]
32199 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32202 ; AVX512VPOPCNTDQ-LABEL: ult_44_v2i64:
32203 ; AVX512VPOPCNTDQ: # %bb.0:
32204 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32205 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32206 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44]
32207 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32208 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32209 ; AVX512VPOPCNTDQ-NEXT: retq
32211 ; AVX512VPOPCNTDQVL-LABEL: ult_44_v2i64:
32212 ; AVX512VPOPCNTDQVL: # %bb.0:
32213 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32214 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32215 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32216 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32217 ; AVX512VPOPCNTDQVL-NEXT: retq
32219 ; BITALG_NOVLX-LABEL: ult_44_v2i64:
32220 ; BITALG_NOVLX: # %bb.0:
32221 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32222 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32223 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32224 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32225 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44]
32226 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32227 ; BITALG_NOVLX-NEXT: vzeroupper
32228 ; BITALG_NOVLX-NEXT: retq
32230 ; BITALG-LABEL: ult_44_v2i64:
32232 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32233 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32234 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32235 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32236 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32237 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32238 ; BITALG-NEXT: retq
32239 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32240 %3 = icmp ult <2 x i64> %2, <i64 44, i64 44>
32241 %4 = sext <2 x i1> %3 to <2 x i64>
32245 define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) {
32246 ; SSE2-LABEL: ugt_44_v2i64:
32248 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32249 ; SSE2-NEXT: psrlw $1, %xmm1
32250 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32251 ; SSE2-NEXT: psubb %xmm1, %xmm0
32252 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32253 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32254 ; SSE2-NEXT: pand %xmm1, %xmm2
32255 ; SSE2-NEXT: psrlw $2, %xmm0
32256 ; SSE2-NEXT: pand %xmm1, %xmm0
32257 ; SSE2-NEXT: paddb %xmm2, %xmm0
32258 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32259 ; SSE2-NEXT: psrlw $4, %xmm1
32260 ; SSE2-NEXT: paddb %xmm0, %xmm1
32261 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32262 ; SSE2-NEXT: pxor %xmm0, %xmm0
32263 ; SSE2-NEXT: psadbw %xmm0, %xmm1
32264 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32265 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32266 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
32267 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32268 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
32269 ; SSE2-NEXT: pand %xmm2, %xmm3
32270 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
32271 ; SSE2-NEXT: por %xmm3, %xmm0
32274 ; SSE3-LABEL: ugt_44_v2i64:
32276 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32277 ; SSE3-NEXT: psrlw $1, %xmm1
32278 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32279 ; SSE3-NEXT: psubb %xmm1, %xmm0
32280 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32281 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32282 ; SSE3-NEXT: pand %xmm1, %xmm2
32283 ; SSE3-NEXT: psrlw $2, %xmm0
32284 ; SSE3-NEXT: pand %xmm1, %xmm0
32285 ; SSE3-NEXT: paddb %xmm2, %xmm0
32286 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32287 ; SSE3-NEXT: psrlw $4, %xmm1
32288 ; SSE3-NEXT: paddb %xmm0, %xmm1
32289 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32290 ; SSE3-NEXT: pxor %xmm0, %xmm0
32291 ; SSE3-NEXT: psadbw %xmm0, %xmm1
32292 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32293 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32294 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
32295 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32296 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
32297 ; SSE3-NEXT: pand %xmm2, %xmm3
32298 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
32299 ; SSE3-NEXT: por %xmm3, %xmm0
32302 ; SSSE3-LABEL: ugt_44_v2i64:
32304 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32305 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32306 ; SSSE3-NEXT: pand %xmm1, %xmm2
32307 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32308 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32309 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32310 ; SSSE3-NEXT: psrlw $4, %xmm0
32311 ; SSSE3-NEXT: pand %xmm1, %xmm0
32312 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32313 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32314 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32315 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
32316 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32317 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
32318 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
32319 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32320 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
32321 ; SSSE3-NEXT: pand %xmm1, %xmm2
32322 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
32323 ; SSSE3-NEXT: por %xmm2, %xmm0
32326 ; SSE41-LABEL: ugt_44_v2i64:
32328 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32329 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32330 ; SSE41-NEXT: pand %xmm1, %xmm2
32331 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32332 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32333 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32334 ; SSE41-NEXT: psrlw $4, %xmm0
32335 ; SSE41-NEXT: pand %xmm1, %xmm0
32336 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32337 ; SSE41-NEXT: paddb %xmm4, %xmm3
32338 ; SSE41-NEXT: pxor %xmm0, %xmm0
32339 ; SSE41-NEXT: psadbw %xmm0, %xmm3
32340 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32341 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
32342 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
32343 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32344 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
32345 ; SSE41-NEXT: pand %xmm1, %xmm2
32346 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
32347 ; SSE41-NEXT: por %xmm2, %xmm0
32350 ; AVX1-LABEL: ugt_44_v2i64:
32352 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32353 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32354 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32355 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32356 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32357 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32358 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32359 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32360 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32361 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32362 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32365 ; AVX2-LABEL: ugt_44_v2i64:
32367 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32368 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32369 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32370 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32371 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32372 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32373 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32374 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32375 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32376 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32377 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32380 ; AVX512VPOPCNTDQ-LABEL: ugt_44_v2i64:
32381 ; AVX512VPOPCNTDQ: # %bb.0:
32382 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32383 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32384 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32385 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32386 ; AVX512VPOPCNTDQ-NEXT: retq
32388 ; AVX512VPOPCNTDQVL-LABEL: ugt_44_v2i64:
32389 ; AVX512VPOPCNTDQVL: # %bb.0:
32390 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32391 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32392 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32393 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32394 ; AVX512VPOPCNTDQVL-NEXT: retq
32396 ; BITALG_NOVLX-LABEL: ugt_44_v2i64:
32397 ; BITALG_NOVLX: # %bb.0:
32398 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32399 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32400 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32401 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32402 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32403 ; BITALG_NOVLX-NEXT: vzeroupper
32404 ; BITALG_NOVLX-NEXT: retq
32406 ; BITALG-LABEL: ugt_44_v2i64:
32408 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32409 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32410 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32411 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32412 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32413 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32414 ; BITALG-NEXT: retq
32415 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32416 %3 = icmp ugt <2 x i64> %2, <i64 44, i64 44>
32417 %4 = sext <2 x i1> %3 to <2 x i64>
32421 define <2 x i64> @ult_45_v2i64(<2 x i64> %0) {
32422 ; SSE2-LABEL: ult_45_v2i64:
32424 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32425 ; SSE2-NEXT: psrlw $1, %xmm1
32426 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32427 ; SSE2-NEXT: psubb %xmm1, %xmm0
32428 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32429 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32430 ; SSE2-NEXT: pand %xmm1, %xmm2
32431 ; SSE2-NEXT: psrlw $2, %xmm0
32432 ; SSE2-NEXT: pand %xmm1, %xmm0
32433 ; SSE2-NEXT: paddb %xmm2, %xmm0
32434 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32435 ; SSE2-NEXT: psrlw $4, %xmm1
32436 ; SSE2-NEXT: paddb %xmm0, %xmm1
32437 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32438 ; SSE2-NEXT: pxor %xmm0, %xmm0
32439 ; SSE2-NEXT: psadbw %xmm0, %xmm1
32440 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32441 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32442 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
32443 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483693,2147483693]
32444 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
32445 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32446 ; SSE2-NEXT: pand %xmm2, %xmm1
32447 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32448 ; SSE2-NEXT: por %xmm1, %xmm0
32451 ; SSE3-LABEL: ult_45_v2i64:
32453 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32454 ; SSE3-NEXT: psrlw $1, %xmm1
32455 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32456 ; SSE3-NEXT: psubb %xmm1, %xmm0
32457 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32458 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32459 ; SSE3-NEXT: pand %xmm1, %xmm2
32460 ; SSE3-NEXT: psrlw $2, %xmm0
32461 ; SSE3-NEXT: pand %xmm1, %xmm0
32462 ; SSE3-NEXT: paddb %xmm2, %xmm0
32463 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32464 ; SSE3-NEXT: psrlw $4, %xmm1
32465 ; SSE3-NEXT: paddb %xmm0, %xmm1
32466 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32467 ; SSE3-NEXT: pxor %xmm0, %xmm0
32468 ; SSE3-NEXT: psadbw %xmm0, %xmm1
32469 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32470 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32471 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
32472 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483693,2147483693]
32473 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
32474 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32475 ; SSE3-NEXT: pand %xmm2, %xmm1
32476 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32477 ; SSE3-NEXT: por %xmm1, %xmm0
32480 ; SSSE3-LABEL: ult_45_v2i64:
32482 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32483 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32484 ; SSSE3-NEXT: pand %xmm1, %xmm2
32485 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32486 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32487 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32488 ; SSSE3-NEXT: psrlw $4, %xmm0
32489 ; SSSE3-NEXT: pand %xmm1, %xmm0
32490 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32491 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32492 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32493 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
32494 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32495 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
32496 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
32497 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483693,2147483693]
32498 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
32499 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
32500 ; SSSE3-NEXT: pand %xmm1, %xmm2
32501 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32502 ; SSSE3-NEXT: por %xmm2, %xmm0
32505 ; SSE41-LABEL: ult_45_v2i64:
32507 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32508 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32509 ; SSE41-NEXT: pand %xmm1, %xmm2
32510 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32511 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32512 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32513 ; SSE41-NEXT: psrlw $4, %xmm0
32514 ; SSE41-NEXT: pand %xmm1, %xmm0
32515 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32516 ; SSE41-NEXT: paddb %xmm4, %xmm3
32517 ; SSE41-NEXT: pxor %xmm0, %xmm0
32518 ; SSE41-NEXT: psadbw %xmm0, %xmm3
32519 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32520 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
32521 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
32522 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483693,2147483693]
32523 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
32524 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
32525 ; SSE41-NEXT: pand %xmm1, %xmm2
32526 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32527 ; SSE41-NEXT: por %xmm2, %xmm0
32530 ; AVX1-LABEL: ult_45_v2i64:
32532 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32533 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32534 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32535 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32536 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32537 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32538 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32539 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32540 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32541 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32542 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45]
32543 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32546 ; AVX2-LABEL: ult_45_v2i64:
32548 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32549 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32550 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32551 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32552 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32553 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32554 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32555 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32556 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32557 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32558 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45]
32559 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32562 ; AVX512VPOPCNTDQ-LABEL: ult_45_v2i64:
32563 ; AVX512VPOPCNTDQ: # %bb.0:
32564 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32565 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32566 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45]
32567 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32568 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32569 ; AVX512VPOPCNTDQ-NEXT: retq
32571 ; AVX512VPOPCNTDQVL-LABEL: ult_45_v2i64:
32572 ; AVX512VPOPCNTDQVL: # %bb.0:
32573 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32574 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32575 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32576 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32577 ; AVX512VPOPCNTDQVL-NEXT: retq
32579 ; BITALG_NOVLX-LABEL: ult_45_v2i64:
32580 ; BITALG_NOVLX: # %bb.0:
32581 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32582 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32583 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32584 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32585 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45]
32586 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32587 ; BITALG_NOVLX-NEXT: vzeroupper
32588 ; BITALG_NOVLX-NEXT: retq
32590 ; BITALG-LABEL: ult_45_v2i64:
32592 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32593 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32594 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32595 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32596 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32597 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32598 ; BITALG-NEXT: retq
32599 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32600 %3 = icmp ult <2 x i64> %2, <i64 45, i64 45>
32601 %4 = sext <2 x i1> %3 to <2 x i64>
32605 define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) {
32606 ; SSE2-LABEL: ugt_45_v2i64:
32608 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32609 ; SSE2-NEXT: psrlw $1, %xmm1
32610 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32611 ; SSE2-NEXT: psubb %xmm1, %xmm0
32612 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32613 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32614 ; SSE2-NEXT: pand %xmm1, %xmm2
32615 ; SSE2-NEXT: psrlw $2, %xmm0
32616 ; SSE2-NEXT: pand %xmm1, %xmm0
32617 ; SSE2-NEXT: paddb %xmm2, %xmm0
32618 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32619 ; SSE2-NEXT: psrlw $4, %xmm1
32620 ; SSE2-NEXT: paddb %xmm0, %xmm1
32621 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32622 ; SSE2-NEXT: pxor %xmm0, %xmm0
32623 ; SSE2-NEXT: psadbw %xmm0, %xmm1
32624 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32625 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32626 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
32627 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32628 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
32629 ; SSE2-NEXT: pand %xmm2, %xmm3
32630 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
32631 ; SSE2-NEXT: por %xmm3, %xmm0
32634 ; SSE3-LABEL: ugt_45_v2i64:
32636 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32637 ; SSE3-NEXT: psrlw $1, %xmm1
32638 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32639 ; SSE3-NEXT: psubb %xmm1, %xmm0
32640 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32641 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32642 ; SSE3-NEXT: pand %xmm1, %xmm2
32643 ; SSE3-NEXT: psrlw $2, %xmm0
32644 ; SSE3-NEXT: pand %xmm1, %xmm0
32645 ; SSE3-NEXT: paddb %xmm2, %xmm0
32646 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32647 ; SSE3-NEXT: psrlw $4, %xmm1
32648 ; SSE3-NEXT: paddb %xmm0, %xmm1
32649 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32650 ; SSE3-NEXT: pxor %xmm0, %xmm0
32651 ; SSE3-NEXT: psadbw %xmm0, %xmm1
32652 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32653 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32654 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
32655 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32656 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
32657 ; SSE3-NEXT: pand %xmm2, %xmm3
32658 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
32659 ; SSE3-NEXT: por %xmm3, %xmm0
32662 ; SSSE3-LABEL: ugt_45_v2i64:
32664 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32665 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32666 ; SSSE3-NEXT: pand %xmm1, %xmm2
32667 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32668 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32669 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32670 ; SSSE3-NEXT: psrlw $4, %xmm0
32671 ; SSSE3-NEXT: pand %xmm1, %xmm0
32672 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32673 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32674 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32675 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
32676 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32677 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
32678 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
32679 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32680 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
32681 ; SSSE3-NEXT: pand %xmm1, %xmm2
32682 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
32683 ; SSSE3-NEXT: por %xmm2, %xmm0
32686 ; SSE41-LABEL: ugt_45_v2i64:
32688 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32689 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32690 ; SSE41-NEXT: pand %xmm1, %xmm2
32691 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32692 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32693 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32694 ; SSE41-NEXT: psrlw $4, %xmm0
32695 ; SSE41-NEXT: pand %xmm1, %xmm0
32696 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32697 ; SSE41-NEXT: paddb %xmm4, %xmm3
32698 ; SSE41-NEXT: pxor %xmm0, %xmm0
32699 ; SSE41-NEXT: psadbw %xmm0, %xmm3
32700 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32701 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
32702 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
32703 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32704 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
32705 ; SSE41-NEXT: pand %xmm1, %xmm2
32706 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
32707 ; SSE41-NEXT: por %xmm2, %xmm0
32710 ; AVX1-LABEL: ugt_45_v2i64:
32712 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32713 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32714 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32715 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32716 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32717 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32718 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32719 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32720 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32721 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32722 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32725 ; AVX2-LABEL: ugt_45_v2i64:
32727 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32728 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32729 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32730 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32731 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32732 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32733 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32734 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32735 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32736 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32737 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32740 ; AVX512VPOPCNTDQ-LABEL: ugt_45_v2i64:
32741 ; AVX512VPOPCNTDQ: # %bb.0:
32742 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32743 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32744 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32745 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32746 ; AVX512VPOPCNTDQ-NEXT: retq
32748 ; AVX512VPOPCNTDQVL-LABEL: ugt_45_v2i64:
32749 ; AVX512VPOPCNTDQVL: # %bb.0:
32750 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32751 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32752 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32753 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32754 ; AVX512VPOPCNTDQVL-NEXT: retq
32756 ; BITALG_NOVLX-LABEL: ugt_45_v2i64:
32757 ; BITALG_NOVLX: # %bb.0:
32758 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32759 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32760 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32761 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32762 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32763 ; BITALG_NOVLX-NEXT: vzeroupper
32764 ; BITALG_NOVLX-NEXT: retq
32766 ; BITALG-LABEL: ugt_45_v2i64:
32768 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32769 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32770 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32771 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32772 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32773 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32774 ; BITALG-NEXT: retq
32775 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32776 %3 = icmp ugt <2 x i64> %2, <i64 45, i64 45>
32777 %4 = sext <2 x i1> %3 to <2 x i64>
32781 define <2 x i64> @ult_46_v2i64(<2 x i64> %0) {
32782 ; SSE2-LABEL: ult_46_v2i64:
32784 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32785 ; SSE2-NEXT: psrlw $1, %xmm1
32786 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32787 ; SSE2-NEXT: psubb %xmm1, %xmm0
32788 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32789 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32790 ; SSE2-NEXT: pand %xmm1, %xmm2
32791 ; SSE2-NEXT: psrlw $2, %xmm0
32792 ; SSE2-NEXT: pand %xmm1, %xmm0
32793 ; SSE2-NEXT: paddb %xmm2, %xmm0
32794 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32795 ; SSE2-NEXT: psrlw $4, %xmm1
32796 ; SSE2-NEXT: paddb %xmm0, %xmm1
32797 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32798 ; SSE2-NEXT: pxor %xmm0, %xmm0
32799 ; SSE2-NEXT: psadbw %xmm0, %xmm1
32800 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32801 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32802 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
32803 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483694,2147483694]
32804 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
32805 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32806 ; SSE2-NEXT: pand %xmm2, %xmm1
32807 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32808 ; SSE2-NEXT: por %xmm1, %xmm0
32811 ; SSE3-LABEL: ult_46_v2i64:
32813 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32814 ; SSE3-NEXT: psrlw $1, %xmm1
32815 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32816 ; SSE3-NEXT: psubb %xmm1, %xmm0
32817 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32818 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32819 ; SSE3-NEXT: pand %xmm1, %xmm2
32820 ; SSE3-NEXT: psrlw $2, %xmm0
32821 ; SSE3-NEXT: pand %xmm1, %xmm0
32822 ; SSE3-NEXT: paddb %xmm2, %xmm0
32823 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32824 ; SSE3-NEXT: psrlw $4, %xmm1
32825 ; SSE3-NEXT: paddb %xmm0, %xmm1
32826 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32827 ; SSE3-NEXT: pxor %xmm0, %xmm0
32828 ; SSE3-NEXT: psadbw %xmm0, %xmm1
32829 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32830 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32831 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
32832 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483694,2147483694]
32833 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
32834 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32835 ; SSE3-NEXT: pand %xmm2, %xmm1
32836 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32837 ; SSE3-NEXT: por %xmm1, %xmm0
32840 ; SSSE3-LABEL: ult_46_v2i64:
32842 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32843 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32844 ; SSSE3-NEXT: pand %xmm1, %xmm2
32845 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32846 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32847 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32848 ; SSSE3-NEXT: psrlw $4, %xmm0
32849 ; SSSE3-NEXT: pand %xmm1, %xmm0
32850 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32851 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32852 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32853 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
32854 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32855 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
32856 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
32857 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483694,2147483694]
32858 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
32859 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
32860 ; SSSE3-NEXT: pand %xmm1, %xmm2
32861 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32862 ; SSSE3-NEXT: por %xmm2, %xmm0
32865 ; SSE41-LABEL: ult_46_v2i64:
32867 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32868 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32869 ; SSE41-NEXT: pand %xmm1, %xmm2
32870 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32871 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32872 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32873 ; SSE41-NEXT: psrlw $4, %xmm0
32874 ; SSE41-NEXT: pand %xmm1, %xmm0
32875 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32876 ; SSE41-NEXT: paddb %xmm4, %xmm3
32877 ; SSE41-NEXT: pxor %xmm0, %xmm0
32878 ; SSE41-NEXT: psadbw %xmm0, %xmm3
32879 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
32880 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
32881 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
32882 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483694,2147483694]
32883 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
32884 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
32885 ; SSE41-NEXT: pand %xmm1, %xmm2
32886 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
32887 ; SSE41-NEXT: por %xmm2, %xmm0
32890 ; AVX1-LABEL: ult_46_v2i64:
32892 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32893 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32894 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32895 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32896 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32897 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32898 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32899 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32900 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32901 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32902 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46]
32903 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32906 ; AVX2-LABEL: ult_46_v2i64:
32908 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32909 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32910 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32911 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32912 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32913 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32914 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32915 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32916 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32917 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32918 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46]
32919 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32922 ; AVX512VPOPCNTDQ-LABEL: ult_46_v2i64:
32923 ; AVX512VPOPCNTDQ: # %bb.0:
32924 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32925 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32926 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46]
32927 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32928 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32929 ; AVX512VPOPCNTDQ-NEXT: retq
32931 ; AVX512VPOPCNTDQVL-LABEL: ult_46_v2i64:
32932 ; AVX512VPOPCNTDQVL: # %bb.0:
32933 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32934 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32935 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32936 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32937 ; AVX512VPOPCNTDQVL-NEXT: retq
32939 ; BITALG_NOVLX-LABEL: ult_46_v2i64:
32940 ; BITALG_NOVLX: # %bb.0:
32941 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32942 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32943 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32944 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32945 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46]
32946 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32947 ; BITALG_NOVLX-NEXT: vzeroupper
32948 ; BITALG_NOVLX-NEXT: retq
32950 ; BITALG-LABEL: ult_46_v2i64:
32952 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32953 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32954 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32955 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
32956 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
32957 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
32958 ; BITALG-NEXT: retq
32959 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32960 %3 = icmp ult <2 x i64> %2, <i64 46, i64 46>
32961 %4 = sext <2 x i1> %3 to <2 x i64>
32965 define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) {
32966 ; SSE2-LABEL: ugt_46_v2i64:
32968 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32969 ; SSE2-NEXT: psrlw $1, %xmm1
32970 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32971 ; SSE2-NEXT: psubb %xmm1, %xmm0
32972 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32973 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32974 ; SSE2-NEXT: pand %xmm1, %xmm2
32975 ; SSE2-NEXT: psrlw $2, %xmm0
32976 ; SSE2-NEXT: pand %xmm1, %xmm0
32977 ; SSE2-NEXT: paddb %xmm2, %xmm0
32978 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32979 ; SSE2-NEXT: psrlw $4, %xmm1
32980 ; SSE2-NEXT: paddb %xmm0, %xmm1
32981 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32982 ; SSE2-NEXT: pxor %xmm0, %xmm0
32983 ; SSE2-NEXT: psadbw %xmm0, %xmm1
32984 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32985 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
32986 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
32987 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32988 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
32989 ; SSE2-NEXT: pand %xmm2, %xmm3
32990 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
32991 ; SSE2-NEXT: por %xmm3, %xmm0
32994 ; SSE3-LABEL: ugt_46_v2i64:
32996 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32997 ; SSE3-NEXT: psrlw $1, %xmm1
32998 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32999 ; SSE3-NEXT: psubb %xmm1, %xmm0
33000 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33001 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33002 ; SSE3-NEXT: pand %xmm1, %xmm2
33003 ; SSE3-NEXT: psrlw $2, %xmm0
33004 ; SSE3-NEXT: pand %xmm1, %xmm0
33005 ; SSE3-NEXT: paddb %xmm2, %xmm0
33006 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33007 ; SSE3-NEXT: psrlw $4, %xmm1
33008 ; SSE3-NEXT: paddb %xmm0, %xmm1
33009 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33010 ; SSE3-NEXT: pxor %xmm0, %xmm0
33011 ; SSE3-NEXT: psadbw %xmm0, %xmm1
33012 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33013 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33014 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
33015 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33016 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
33017 ; SSE3-NEXT: pand %xmm2, %xmm3
33018 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
33019 ; SSE3-NEXT: por %xmm3, %xmm0
33022 ; SSSE3-LABEL: ugt_46_v2i64:
33024 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33025 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33026 ; SSSE3-NEXT: pand %xmm1, %xmm2
33027 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33028 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33029 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33030 ; SSSE3-NEXT: psrlw $4, %xmm0
33031 ; SSSE3-NEXT: pand %xmm1, %xmm0
33032 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33033 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33034 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33035 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
33036 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33037 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33038 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
33039 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33040 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
33041 ; SSSE3-NEXT: pand %xmm1, %xmm2
33042 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
33043 ; SSSE3-NEXT: por %xmm2, %xmm0
33046 ; SSE41-LABEL: ugt_46_v2i64:
33048 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33049 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33050 ; SSE41-NEXT: pand %xmm1, %xmm2
33051 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33052 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33053 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33054 ; SSE41-NEXT: psrlw $4, %xmm0
33055 ; SSE41-NEXT: pand %xmm1, %xmm0
33056 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33057 ; SSE41-NEXT: paddb %xmm4, %xmm3
33058 ; SSE41-NEXT: pxor %xmm0, %xmm0
33059 ; SSE41-NEXT: psadbw %xmm0, %xmm3
33060 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33061 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33062 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
33063 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33064 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
33065 ; SSE41-NEXT: pand %xmm1, %xmm2
33066 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
33067 ; SSE41-NEXT: por %xmm2, %xmm0
33070 ; AVX1-LABEL: ugt_46_v2i64:
33072 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33073 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33074 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33075 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33076 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33077 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33078 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33079 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33080 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33081 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33082 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33085 ; AVX2-LABEL: ugt_46_v2i64:
33087 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33088 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33089 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33090 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33091 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33092 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33093 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33094 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33095 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33096 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33097 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33100 ; AVX512VPOPCNTDQ-LABEL: ugt_46_v2i64:
33101 ; AVX512VPOPCNTDQ: # %bb.0:
33102 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33103 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33104 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33105 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33106 ; AVX512VPOPCNTDQ-NEXT: retq
33108 ; AVX512VPOPCNTDQVL-LABEL: ugt_46_v2i64:
33109 ; AVX512VPOPCNTDQVL: # %bb.0:
33110 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33111 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
33112 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
33113 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33114 ; AVX512VPOPCNTDQVL-NEXT: retq
33116 ; BITALG_NOVLX-LABEL: ugt_46_v2i64:
33117 ; BITALG_NOVLX: # %bb.0:
33118 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33119 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33120 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33121 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33122 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33123 ; BITALG_NOVLX-NEXT: vzeroupper
33124 ; BITALG_NOVLX-NEXT: retq
33126 ; BITALG-LABEL: ugt_46_v2i64:
33128 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33129 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33130 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33131 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
33132 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
33133 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33134 ; BITALG-NEXT: retq
33135 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33136 %3 = icmp ugt <2 x i64> %2, <i64 46, i64 46>
33137 %4 = sext <2 x i1> %3 to <2 x i64>
33141 define <2 x i64> @ult_47_v2i64(<2 x i64> %0) {
33142 ; SSE2-LABEL: ult_47_v2i64:
33144 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33145 ; SSE2-NEXT: psrlw $1, %xmm1
33146 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33147 ; SSE2-NEXT: psubb %xmm1, %xmm0
33148 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33149 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33150 ; SSE2-NEXT: pand %xmm1, %xmm2
33151 ; SSE2-NEXT: psrlw $2, %xmm0
33152 ; SSE2-NEXT: pand %xmm1, %xmm0
33153 ; SSE2-NEXT: paddb %xmm2, %xmm0
33154 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33155 ; SSE2-NEXT: psrlw $4, %xmm1
33156 ; SSE2-NEXT: paddb %xmm0, %xmm1
33157 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33158 ; SSE2-NEXT: pxor %xmm0, %xmm0
33159 ; SSE2-NEXT: psadbw %xmm0, %xmm1
33160 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33161 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33162 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
33163 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483695,2147483695]
33164 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
33165 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33166 ; SSE2-NEXT: pand %xmm2, %xmm1
33167 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33168 ; SSE2-NEXT: por %xmm1, %xmm0
33171 ; SSE3-LABEL: ult_47_v2i64:
33173 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33174 ; SSE3-NEXT: psrlw $1, %xmm1
33175 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33176 ; SSE3-NEXT: psubb %xmm1, %xmm0
33177 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33178 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33179 ; SSE3-NEXT: pand %xmm1, %xmm2
33180 ; SSE3-NEXT: psrlw $2, %xmm0
33181 ; SSE3-NEXT: pand %xmm1, %xmm0
33182 ; SSE3-NEXT: paddb %xmm2, %xmm0
33183 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33184 ; SSE3-NEXT: psrlw $4, %xmm1
33185 ; SSE3-NEXT: paddb %xmm0, %xmm1
33186 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33187 ; SSE3-NEXT: pxor %xmm0, %xmm0
33188 ; SSE3-NEXT: psadbw %xmm0, %xmm1
33189 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33190 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33191 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
33192 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483695,2147483695]
33193 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
33194 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33195 ; SSE3-NEXT: pand %xmm2, %xmm1
33196 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33197 ; SSE3-NEXT: por %xmm1, %xmm0
33200 ; SSSE3-LABEL: ult_47_v2i64:
33202 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33203 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33204 ; SSSE3-NEXT: pand %xmm1, %xmm2
33205 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33206 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33207 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33208 ; SSSE3-NEXT: psrlw $4, %xmm0
33209 ; SSSE3-NEXT: pand %xmm1, %xmm0
33210 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33211 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33212 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33213 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
33214 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33215 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33216 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
33217 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483695,2147483695]
33218 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
33219 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
33220 ; SSSE3-NEXT: pand %xmm1, %xmm2
33221 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33222 ; SSSE3-NEXT: por %xmm2, %xmm0
33225 ; SSE41-LABEL: ult_47_v2i64:
33227 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33228 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33229 ; SSE41-NEXT: pand %xmm1, %xmm2
33230 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33231 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33232 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33233 ; SSE41-NEXT: psrlw $4, %xmm0
33234 ; SSE41-NEXT: pand %xmm1, %xmm0
33235 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33236 ; SSE41-NEXT: paddb %xmm4, %xmm3
33237 ; SSE41-NEXT: pxor %xmm0, %xmm0
33238 ; SSE41-NEXT: psadbw %xmm0, %xmm3
33239 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33240 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33241 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
33242 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483695,2147483695]
33243 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
33244 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
33245 ; SSE41-NEXT: pand %xmm1, %xmm2
33246 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33247 ; SSE41-NEXT: por %xmm2, %xmm0
33250 ; AVX1-LABEL: ult_47_v2i64:
33252 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33253 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33254 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33255 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33256 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33257 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33258 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33259 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33260 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33261 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33262 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47]
33263 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33266 ; AVX2-LABEL: ult_47_v2i64:
33268 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33269 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33270 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33271 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33272 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33273 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33274 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33275 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33276 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33277 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33278 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47]
33279 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33282 ; AVX512VPOPCNTDQ-LABEL: ult_47_v2i64:
33283 ; AVX512VPOPCNTDQ: # %bb.0:
33284 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33285 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33286 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47]
33287 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33288 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33289 ; AVX512VPOPCNTDQ-NEXT: retq
33291 ; AVX512VPOPCNTDQVL-LABEL: ult_47_v2i64:
33292 ; AVX512VPOPCNTDQVL: # %bb.0:
33293 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33294 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
33295 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
33296 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33297 ; AVX512VPOPCNTDQVL-NEXT: retq
33299 ; BITALG_NOVLX-LABEL: ult_47_v2i64:
33300 ; BITALG_NOVLX: # %bb.0:
33301 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33302 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33303 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33304 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33305 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47]
33306 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33307 ; BITALG_NOVLX-NEXT: vzeroupper
33308 ; BITALG_NOVLX-NEXT: retq
33310 ; BITALG-LABEL: ult_47_v2i64:
33312 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33313 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33314 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33315 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
33316 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
33317 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33318 ; BITALG-NEXT: retq
33319 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33320 %3 = icmp ult <2 x i64> %2, <i64 47, i64 47>
33321 %4 = sext <2 x i1> %3 to <2 x i64>
33325 define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) {
33326 ; SSE2-LABEL: ugt_47_v2i64:
33328 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33329 ; SSE2-NEXT: psrlw $1, %xmm1
33330 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33331 ; SSE2-NEXT: psubb %xmm1, %xmm0
33332 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33333 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33334 ; SSE2-NEXT: pand %xmm1, %xmm2
33335 ; SSE2-NEXT: psrlw $2, %xmm0
33336 ; SSE2-NEXT: pand %xmm1, %xmm0
33337 ; SSE2-NEXT: paddb %xmm2, %xmm0
33338 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33339 ; SSE2-NEXT: psrlw $4, %xmm1
33340 ; SSE2-NEXT: paddb %xmm0, %xmm1
33341 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33342 ; SSE2-NEXT: pxor %xmm0, %xmm0
33343 ; SSE2-NEXT: psadbw %xmm0, %xmm1
33344 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33345 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33346 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
33347 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33348 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
33349 ; SSE2-NEXT: pand %xmm2, %xmm3
33350 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
33351 ; SSE2-NEXT: por %xmm3, %xmm0
33354 ; SSE3-LABEL: ugt_47_v2i64:
33356 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33357 ; SSE3-NEXT: psrlw $1, %xmm1
33358 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33359 ; SSE3-NEXT: psubb %xmm1, %xmm0
33360 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33361 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33362 ; SSE3-NEXT: pand %xmm1, %xmm2
33363 ; SSE3-NEXT: psrlw $2, %xmm0
33364 ; SSE3-NEXT: pand %xmm1, %xmm0
33365 ; SSE3-NEXT: paddb %xmm2, %xmm0
33366 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33367 ; SSE3-NEXT: psrlw $4, %xmm1
33368 ; SSE3-NEXT: paddb %xmm0, %xmm1
33369 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33370 ; SSE3-NEXT: pxor %xmm0, %xmm0
33371 ; SSE3-NEXT: psadbw %xmm0, %xmm1
33372 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33373 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33374 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
33375 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33376 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
33377 ; SSE3-NEXT: pand %xmm2, %xmm3
33378 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
33379 ; SSE3-NEXT: por %xmm3, %xmm0
33382 ; SSSE3-LABEL: ugt_47_v2i64:
33384 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33385 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33386 ; SSSE3-NEXT: pand %xmm1, %xmm2
33387 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33388 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33389 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33390 ; SSSE3-NEXT: psrlw $4, %xmm0
33391 ; SSSE3-NEXT: pand %xmm1, %xmm0
33392 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33393 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33394 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33395 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
33396 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33397 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33398 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
33399 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33400 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
33401 ; SSSE3-NEXT: pand %xmm1, %xmm2
33402 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
33403 ; SSSE3-NEXT: por %xmm2, %xmm0
33406 ; SSE41-LABEL: ugt_47_v2i64:
33408 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33409 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33410 ; SSE41-NEXT: pand %xmm1, %xmm2
33411 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33412 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33413 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33414 ; SSE41-NEXT: psrlw $4, %xmm0
33415 ; SSE41-NEXT: pand %xmm1, %xmm0
33416 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33417 ; SSE41-NEXT: paddb %xmm4, %xmm3
33418 ; SSE41-NEXT: pxor %xmm0, %xmm0
33419 ; SSE41-NEXT: psadbw %xmm0, %xmm3
33420 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33421 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33422 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
33423 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33424 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
33425 ; SSE41-NEXT: pand %xmm1, %xmm2
33426 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
33427 ; SSE41-NEXT: por %xmm2, %xmm0
33430 ; AVX1-LABEL: ugt_47_v2i64:
33432 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33433 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33434 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33435 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33436 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33437 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33438 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33439 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33440 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33441 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33442 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33445 ; AVX2-LABEL: ugt_47_v2i64:
33447 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33448 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33449 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33450 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33451 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33452 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33453 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33454 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33455 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33456 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33457 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33460 ; AVX512VPOPCNTDQ-LABEL: ugt_47_v2i64:
33461 ; AVX512VPOPCNTDQ: # %bb.0:
33462 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33463 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33464 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33465 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33466 ; AVX512VPOPCNTDQ-NEXT: retq
33468 ; AVX512VPOPCNTDQVL-LABEL: ugt_47_v2i64:
33469 ; AVX512VPOPCNTDQVL: # %bb.0:
33470 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33471 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
33472 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
33473 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33474 ; AVX512VPOPCNTDQVL-NEXT: retq
33476 ; BITALG_NOVLX-LABEL: ugt_47_v2i64:
33477 ; BITALG_NOVLX: # %bb.0:
33478 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33479 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33480 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33481 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33482 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33483 ; BITALG_NOVLX-NEXT: vzeroupper
33484 ; BITALG_NOVLX-NEXT: retq
33486 ; BITALG-LABEL: ugt_47_v2i64:
33488 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33489 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33490 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33491 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
33492 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
33493 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33494 ; BITALG-NEXT: retq
33495 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33496 %3 = icmp ugt <2 x i64> %2, <i64 47, i64 47>
33497 %4 = sext <2 x i1> %3 to <2 x i64>
33501 define <2 x i64> @ult_48_v2i64(<2 x i64> %0) {
33502 ; SSE2-LABEL: ult_48_v2i64:
33504 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33505 ; SSE2-NEXT: psrlw $1, %xmm1
33506 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33507 ; SSE2-NEXT: psubb %xmm1, %xmm0
33508 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33509 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33510 ; SSE2-NEXT: pand %xmm1, %xmm2
33511 ; SSE2-NEXT: psrlw $2, %xmm0
33512 ; SSE2-NEXT: pand %xmm1, %xmm0
33513 ; SSE2-NEXT: paddb %xmm2, %xmm0
33514 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33515 ; SSE2-NEXT: psrlw $4, %xmm1
33516 ; SSE2-NEXT: paddb %xmm0, %xmm1
33517 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33518 ; SSE2-NEXT: pxor %xmm0, %xmm0
33519 ; SSE2-NEXT: psadbw %xmm0, %xmm1
33520 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33521 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33522 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
33523 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483696,2147483696]
33524 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
33525 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33526 ; SSE2-NEXT: pand %xmm2, %xmm1
33527 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33528 ; SSE2-NEXT: por %xmm1, %xmm0
33531 ; SSE3-LABEL: ult_48_v2i64:
33533 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33534 ; SSE3-NEXT: psrlw $1, %xmm1
33535 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33536 ; SSE3-NEXT: psubb %xmm1, %xmm0
33537 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33538 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33539 ; SSE3-NEXT: pand %xmm1, %xmm2
33540 ; SSE3-NEXT: psrlw $2, %xmm0
33541 ; SSE3-NEXT: pand %xmm1, %xmm0
33542 ; SSE3-NEXT: paddb %xmm2, %xmm0
33543 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33544 ; SSE3-NEXT: psrlw $4, %xmm1
33545 ; SSE3-NEXT: paddb %xmm0, %xmm1
33546 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33547 ; SSE3-NEXT: pxor %xmm0, %xmm0
33548 ; SSE3-NEXT: psadbw %xmm0, %xmm1
33549 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33550 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33551 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
33552 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483696,2147483696]
33553 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
33554 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33555 ; SSE3-NEXT: pand %xmm2, %xmm1
33556 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33557 ; SSE3-NEXT: por %xmm1, %xmm0
33560 ; SSSE3-LABEL: ult_48_v2i64:
33562 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33563 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33564 ; SSSE3-NEXT: pand %xmm1, %xmm2
33565 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33566 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33567 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33568 ; SSSE3-NEXT: psrlw $4, %xmm0
33569 ; SSSE3-NEXT: pand %xmm1, %xmm0
33570 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33571 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33572 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33573 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
33574 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33575 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33576 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
33577 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483696,2147483696]
33578 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
33579 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
33580 ; SSSE3-NEXT: pand %xmm1, %xmm2
33581 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33582 ; SSSE3-NEXT: por %xmm2, %xmm0
33585 ; SSE41-LABEL: ult_48_v2i64:
33587 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33588 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33589 ; SSE41-NEXT: pand %xmm1, %xmm2
33590 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33591 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33592 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33593 ; SSE41-NEXT: psrlw $4, %xmm0
33594 ; SSE41-NEXT: pand %xmm1, %xmm0
33595 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33596 ; SSE41-NEXT: paddb %xmm4, %xmm3
33597 ; SSE41-NEXT: pxor %xmm0, %xmm0
33598 ; SSE41-NEXT: psadbw %xmm0, %xmm3
33599 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33600 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33601 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
33602 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483696,2147483696]
33603 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
33604 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
33605 ; SSE41-NEXT: pand %xmm1, %xmm2
33606 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33607 ; SSE41-NEXT: por %xmm2, %xmm0
33610 ; AVX1-LABEL: ult_48_v2i64:
33612 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33613 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33614 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33615 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33616 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33617 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33618 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33619 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33620 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33621 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33622 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48]
33623 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33626 ; AVX2-LABEL: ult_48_v2i64:
33628 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33629 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33630 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33631 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33632 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33633 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33634 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33635 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33636 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33637 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33638 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48]
33639 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33642 ; AVX512VPOPCNTDQ-LABEL: ult_48_v2i64:
33643 ; AVX512VPOPCNTDQ: # %bb.0:
33644 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33645 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33646 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48]
33647 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33648 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33649 ; AVX512VPOPCNTDQ-NEXT: retq
33651 ; AVX512VPOPCNTDQVL-LABEL: ult_48_v2i64:
33652 ; AVX512VPOPCNTDQVL: # %bb.0:
33653 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33654 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
33655 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
33656 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33657 ; AVX512VPOPCNTDQVL-NEXT: retq
33659 ; BITALG_NOVLX-LABEL: ult_48_v2i64:
33660 ; BITALG_NOVLX: # %bb.0:
33661 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33662 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33663 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33664 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33665 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48]
33666 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33667 ; BITALG_NOVLX-NEXT: vzeroupper
33668 ; BITALG_NOVLX-NEXT: retq
33670 ; BITALG-LABEL: ult_48_v2i64:
33672 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33673 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33674 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33675 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
33676 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
33677 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33678 ; BITALG-NEXT: retq
33679 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33680 %3 = icmp ult <2 x i64> %2, <i64 48, i64 48>
33681 %4 = sext <2 x i1> %3 to <2 x i64>
33685 define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) {
33686 ; SSE2-LABEL: ugt_48_v2i64:
33688 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33689 ; SSE2-NEXT: psrlw $1, %xmm1
33690 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33691 ; SSE2-NEXT: psubb %xmm1, %xmm0
33692 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33693 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33694 ; SSE2-NEXT: pand %xmm1, %xmm2
33695 ; SSE2-NEXT: psrlw $2, %xmm0
33696 ; SSE2-NEXT: pand %xmm1, %xmm0
33697 ; SSE2-NEXT: paddb %xmm2, %xmm0
33698 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33699 ; SSE2-NEXT: psrlw $4, %xmm1
33700 ; SSE2-NEXT: paddb %xmm0, %xmm1
33701 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33702 ; SSE2-NEXT: pxor %xmm0, %xmm0
33703 ; SSE2-NEXT: psadbw %xmm0, %xmm1
33704 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33705 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33706 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
33707 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33708 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
33709 ; SSE2-NEXT: pand %xmm2, %xmm3
33710 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
33711 ; SSE2-NEXT: por %xmm3, %xmm0
33714 ; SSE3-LABEL: ugt_48_v2i64:
33716 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33717 ; SSE3-NEXT: psrlw $1, %xmm1
33718 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33719 ; SSE3-NEXT: psubb %xmm1, %xmm0
33720 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33721 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33722 ; SSE3-NEXT: pand %xmm1, %xmm2
33723 ; SSE3-NEXT: psrlw $2, %xmm0
33724 ; SSE3-NEXT: pand %xmm1, %xmm0
33725 ; SSE3-NEXT: paddb %xmm2, %xmm0
33726 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33727 ; SSE3-NEXT: psrlw $4, %xmm1
33728 ; SSE3-NEXT: paddb %xmm0, %xmm1
33729 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33730 ; SSE3-NEXT: pxor %xmm0, %xmm0
33731 ; SSE3-NEXT: psadbw %xmm0, %xmm1
33732 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33733 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33734 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
33735 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33736 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
33737 ; SSE3-NEXT: pand %xmm2, %xmm3
33738 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
33739 ; SSE3-NEXT: por %xmm3, %xmm0
33742 ; SSSE3-LABEL: ugt_48_v2i64:
33744 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33745 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33746 ; SSSE3-NEXT: pand %xmm1, %xmm2
33747 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33748 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33749 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33750 ; SSSE3-NEXT: psrlw $4, %xmm0
33751 ; SSSE3-NEXT: pand %xmm1, %xmm0
33752 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33753 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33754 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33755 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
33756 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33757 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33758 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
33759 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33760 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
33761 ; SSSE3-NEXT: pand %xmm1, %xmm2
33762 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
33763 ; SSSE3-NEXT: por %xmm2, %xmm0
33766 ; SSE41-LABEL: ugt_48_v2i64:
33768 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33769 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33770 ; SSE41-NEXT: pand %xmm1, %xmm2
33771 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33772 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33773 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33774 ; SSE41-NEXT: psrlw $4, %xmm0
33775 ; SSE41-NEXT: pand %xmm1, %xmm0
33776 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33777 ; SSE41-NEXT: paddb %xmm4, %xmm3
33778 ; SSE41-NEXT: pxor %xmm0, %xmm0
33779 ; SSE41-NEXT: psadbw %xmm0, %xmm3
33780 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33781 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33782 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
33783 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33784 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
33785 ; SSE41-NEXT: pand %xmm1, %xmm2
33786 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
33787 ; SSE41-NEXT: por %xmm2, %xmm0
33790 ; AVX1-LABEL: ugt_48_v2i64:
33792 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33793 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33794 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33795 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33796 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33797 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33798 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33799 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33800 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33801 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33802 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33805 ; AVX2-LABEL: ugt_48_v2i64:
33807 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33808 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33809 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33810 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33811 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33812 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33813 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33814 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33815 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33816 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33817 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33820 ; AVX512VPOPCNTDQ-LABEL: ugt_48_v2i64:
33821 ; AVX512VPOPCNTDQ: # %bb.0:
33822 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33823 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33824 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33825 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33826 ; AVX512VPOPCNTDQ-NEXT: retq
33828 ; AVX512VPOPCNTDQVL-LABEL: ugt_48_v2i64:
33829 ; AVX512VPOPCNTDQVL: # %bb.0:
33830 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33831 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
33832 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
33833 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33834 ; AVX512VPOPCNTDQVL-NEXT: retq
33836 ; BITALG_NOVLX-LABEL: ugt_48_v2i64:
33837 ; BITALG_NOVLX: # %bb.0:
33838 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33839 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33840 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33841 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33842 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33843 ; BITALG_NOVLX-NEXT: vzeroupper
33844 ; BITALG_NOVLX-NEXT: retq
33846 ; BITALG-LABEL: ugt_48_v2i64:
33848 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33849 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33850 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33851 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
33852 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
33853 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
33854 ; BITALG-NEXT: retq
33855 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33856 %3 = icmp ugt <2 x i64> %2, <i64 48, i64 48>
33857 %4 = sext <2 x i1> %3 to <2 x i64>
33861 define <2 x i64> @ult_49_v2i64(<2 x i64> %0) {
33862 ; SSE2-LABEL: ult_49_v2i64:
33864 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33865 ; SSE2-NEXT: psrlw $1, %xmm1
33866 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33867 ; SSE2-NEXT: psubb %xmm1, %xmm0
33868 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33869 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33870 ; SSE2-NEXT: pand %xmm1, %xmm2
33871 ; SSE2-NEXT: psrlw $2, %xmm0
33872 ; SSE2-NEXT: pand %xmm1, %xmm0
33873 ; SSE2-NEXT: paddb %xmm2, %xmm0
33874 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33875 ; SSE2-NEXT: psrlw $4, %xmm1
33876 ; SSE2-NEXT: paddb %xmm0, %xmm1
33877 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33878 ; SSE2-NEXT: pxor %xmm0, %xmm0
33879 ; SSE2-NEXT: psadbw %xmm0, %xmm1
33880 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33881 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33882 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
33883 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483697,2147483697]
33884 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
33885 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33886 ; SSE2-NEXT: pand %xmm2, %xmm1
33887 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33888 ; SSE2-NEXT: por %xmm1, %xmm0
33891 ; SSE3-LABEL: ult_49_v2i64:
33893 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33894 ; SSE3-NEXT: psrlw $1, %xmm1
33895 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33896 ; SSE3-NEXT: psubb %xmm1, %xmm0
33897 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33898 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33899 ; SSE3-NEXT: pand %xmm1, %xmm2
33900 ; SSE3-NEXT: psrlw $2, %xmm0
33901 ; SSE3-NEXT: pand %xmm1, %xmm0
33902 ; SSE3-NEXT: paddb %xmm2, %xmm0
33903 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33904 ; SSE3-NEXT: psrlw $4, %xmm1
33905 ; SSE3-NEXT: paddb %xmm0, %xmm1
33906 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33907 ; SSE3-NEXT: pxor %xmm0, %xmm0
33908 ; SSE3-NEXT: psadbw %xmm0, %xmm1
33909 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33910 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
33911 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
33912 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483697,2147483697]
33913 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
33914 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33915 ; SSE3-NEXT: pand %xmm2, %xmm1
33916 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33917 ; SSE3-NEXT: por %xmm1, %xmm0
33920 ; SSSE3-LABEL: ult_49_v2i64:
33922 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33923 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33924 ; SSSE3-NEXT: pand %xmm1, %xmm2
33925 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33926 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33927 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33928 ; SSSE3-NEXT: psrlw $4, %xmm0
33929 ; SSSE3-NEXT: pand %xmm1, %xmm0
33930 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33931 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33932 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33933 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
33934 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33935 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33936 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
33937 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483697,2147483697]
33938 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
33939 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
33940 ; SSSE3-NEXT: pand %xmm1, %xmm2
33941 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33942 ; SSSE3-NEXT: por %xmm2, %xmm0
33945 ; SSE41-LABEL: ult_49_v2i64:
33947 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33948 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33949 ; SSE41-NEXT: pand %xmm1, %xmm2
33950 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33951 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33952 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33953 ; SSE41-NEXT: psrlw $4, %xmm0
33954 ; SSE41-NEXT: pand %xmm1, %xmm0
33955 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33956 ; SSE41-NEXT: paddb %xmm4, %xmm3
33957 ; SSE41-NEXT: pxor %xmm0, %xmm0
33958 ; SSE41-NEXT: psadbw %xmm0, %xmm3
33959 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
33960 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
33961 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
33962 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483697,2147483697]
33963 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
33964 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
33965 ; SSE41-NEXT: pand %xmm1, %xmm2
33966 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
33967 ; SSE41-NEXT: por %xmm2, %xmm0
33970 ; AVX1-LABEL: ult_49_v2i64:
33972 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33973 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33974 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33975 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33976 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33977 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33978 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33979 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33980 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33981 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33982 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49]
33983 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33986 ; AVX2-LABEL: ult_49_v2i64:
33988 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33989 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33990 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33991 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33992 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33993 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33994 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33995 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33996 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33997 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33998 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49]
33999 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34002 ; AVX512VPOPCNTDQ-LABEL: ult_49_v2i64:
34003 ; AVX512VPOPCNTDQ: # %bb.0:
34004 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34005 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34006 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49]
34007 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34008 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34009 ; AVX512VPOPCNTDQ-NEXT: retq
34011 ; AVX512VPOPCNTDQVL-LABEL: ult_49_v2i64:
34012 ; AVX512VPOPCNTDQVL: # %bb.0:
34013 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34014 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34015 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34016 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34017 ; AVX512VPOPCNTDQVL-NEXT: retq
34019 ; BITALG_NOVLX-LABEL: ult_49_v2i64:
34020 ; BITALG_NOVLX: # %bb.0:
34021 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34022 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34023 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34024 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34025 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49]
34026 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34027 ; BITALG_NOVLX-NEXT: vzeroupper
34028 ; BITALG_NOVLX-NEXT: retq
34030 ; BITALG-LABEL: ult_49_v2i64:
34032 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34033 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34034 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34035 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34036 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34037 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34038 ; BITALG-NEXT: retq
34039 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34040 %3 = icmp ult <2 x i64> %2, <i64 49, i64 49>
34041 %4 = sext <2 x i1> %3 to <2 x i64>
34045 define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) {
34046 ; SSE2-LABEL: ugt_49_v2i64:
34048 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34049 ; SSE2-NEXT: psrlw $1, %xmm1
34050 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34051 ; SSE2-NEXT: psubb %xmm1, %xmm0
34052 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34053 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34054 ; SSE2-NEXT: pand %xmm1, %xmm2
34055 ; SSE2-NEXT: psrlw $2, %xmm0
34056 ; SSE2-NEXT: pand %xmm1, %xmm0
34057 ; SSE2-NEXT: paddb %xmm2, %xmm0
34058 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34059 ; SSE2-NEXT: psrlw $4, %xmm1
34060 ; SSE2-NEXT: paddb %xmm0, %xmm1
34061 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34062 ; SSE2-NEXT: pxor %xmm0, %xmm0
34063 ; SSE2-NEXT: psadbw %xmm0, %xmm1
34064 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34065 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34066 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
34067 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34068 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
34069 ; SSE2-NEXT: pand %xmm2, %xmm3
34070 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
34071 ; SSE2-NEXT: por %xmm3, %xmm0
34074 ; SSE3-LABEL: ugt_49_v2i64:
34076 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34077 ; SSE3-NEXT: psrlw $1, %xmm1
34078 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34079 ; SSE3-NEXT: psubb %xmm1, %xmm0
34080 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34081 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34082 ; SSE3-NEXT: pand %xmm1, %xmm2
34083 ; SSE3-NEXT: psrlw $2, %xmm0
34084 ; SSE3-NEXT: pand %xmm1, %xmm0
34085 ; SSE3-NEXT: paddb %xmm2, %xmm0
34086 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34087 ; SSE3-NEXT: psrlw $4, %xmm1
34088 ; SSE3-NEXT: paddb %xmm0, %xmm1
34089 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34090 ; SSE3-NEXT: pxor %xmm0, %xmm0
34091 ; SSE3-NEXT: psadbw %xmm0, %xmm1
34092 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34093 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34094 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
34095 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34096 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
34097 ; SSE3-NEXT: pand %xmm2, %xmm3
34098 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
34099 ; SSE3-NEXT: por %xmm3, %xmm0
34102 ; SSSE3-LABEL: ugt_49_v2i64:
34104 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34105 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34106 ; SSSE3-NEXT: pand %xmm1, %xmm2
34107 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34108 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34109 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34110 ; SSSE3-NEXT: psrlw $4, %xmm0
34111 ; SSSE3-NEXT: pand %xmm1, %xmm0
34112 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34113 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34114 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34115 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
34116 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34117 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
34118 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
34119 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34120 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
34121 ; SSSE3-NEXT: pand %xmm1, %xmm2
34122 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
34123 ; SSSE3-NEXT: por %xmm2, %xmm0
34126 ; SSE41-LABEL: ugt_49_v2i64:
34128 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34129 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34130 ; SSE41-NEXT: pand %xmm1, %xmm2
34131 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34132 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34133 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34134 ; SSE41-NEXT: psrlw $4, %xmm0
34135 ; SSE41-NEXT: pand %xmm1, %xmm0
34136 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34137 ; SSE41-NEXT: paddb %xmm4, %xmm3
34138 ; SSE41-NEXT: pxor %xmm0, %xmm0
34139 ; SSE41-NEXT: psadbw %xmm0, %xmm3
34140 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34141 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
34142 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
34143 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34144 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
34145 ; SSE41-NEXT: pand %xmm1, %xmm2
34146 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
34147 ; SSE41-NEXT: por %xmm2, %xmm0
34150 ; AVX1-LABEL: ugt_49_v2i64:
34152 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34153 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34154 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34155 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34156 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34157 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34158 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34159 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34160 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34161 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34162 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34165 ; AVX2-LABEL: ugt_49_v2i64:
34167 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34168 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34169 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34170 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34171 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34172 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34173 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34174 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34175 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34176 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34177 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34180 ; AVX512VPOPCNTDQ-LABEL: ugt_49_v2i64:
34181 ; AVX512VPOPCNTDQ: # %bb.0:
34182 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34183 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34184 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34185 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34186 ; AVX512VPOPCNTDQ-NEXT: retq
34188 ; AVX512VPOPCNTDQVL-LABEL: ugt_49_v2i64:
34189 ; AVX512VPOPCNTDQVL: # %bb.0:
34190 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34191 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34192 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34193 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34194 ; AVX512VPOPCNTDQVL-NEXT: retq
34196 ; BITALG_NOVLX-LABEL: ugt_49_v2i64:
34197 ; BITALG_NOVLX: # %bb.0:
34198 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34199 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34200 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34201 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34202 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34203 ; BITALG_NOVLX-NEXT: vzeroupper
34204 ; BITALG_NOVLX-NEXT: retq
34206 ; BITALG-LABEL: ugt_49_v2i64:
34208 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34209 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34210 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34211 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34212 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34213 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34214 ; BITALG-NEXT: retq
34215 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34216 %3 = icmp ugt <2 x i64> %2, <i64 49, i64 49>
34217 %4 = sext <2 x i1> %3 to <2 x i64>
34221 define <2 x i64> @ult_50_v2i64(<2 x i64> %0) {
34222 ; SSE2-LABEL: ult_50_v2i64:
34224 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34225 ; SSE2-NEXT: psrlw $1, %xmm1
34226 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34227 ; SSE2-NEXT: psubb %xmm1, %xmm0
34228 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34229 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34230 ; SSE2-NEXT: pand %xmm1, %xmm2
34231 ; SSE2-NEXT: psrlw $2, %xmm0
34232 ; SSE2-NEXT: pand %xmm1, %xmm0
34233 ; SSE2-NEXT: paddb %xmm2, %xmm0
34234 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34235 ; SSE2-NEXT: psrlw $4, %xmm1
34236 ; SSE2-NEXT: paddb %xmm0, %xmm1
34237 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34238 ; SSE2-NEXT: pxor %xmm0, %xmm0
34239 ; SSE2-NEXT: psadbw %xmm0, %xmm1
34240 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34241 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34242 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
34243 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483698,2147483698]
34244 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
34245 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34246 ; SSE2-NEXT: pand %xmm2, %xmm1
34247 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
34248 ; SSE2-NEXT: por %xmm1, %xmm0
34251 ; SSE3-LABEL: ult_50_v2i64:
34253 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34254 ; SSE3-NEXT: psrlw $1, %xmm1
34255 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34256 ; SSE3-NEXT: psubb %xmm1, %xmm0
34257 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34258 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34259 ; SSE3-NEXT: pand %xmm1, %xmm2
34260 ; SSE3-NEXT: psrlw $2, %xmm0
34261 ; SSE3-NEXT: pand %xmm1, %xmm0
34262 ; SSE3-NEXT: paddb %xmm2, %xmm0
34263 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34264 ; SSE3-NEXT: psrlw $4, %xmm1
34265 ; SSE3-NEXT: paddb %xmm0, %xmm1
34266 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34267 ; SSE3-NEXT: pxor %xmm0, %xmm0
34268 ; SSE3-NEXT: psadbw %xmm0, %xmm1
34269 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34270 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34271 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
34272 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483698,2147483698]
34273 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
34274 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34275 ; SSE3-NEXT: pand %xmm2, %xmm1
34276 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
34277 ; SSE3-NEXT: por %xmm1, %xmm0
34280 ; SSSE3-LABEL: ult_50_v2i64:
34282 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34283 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34284 ; SSSE3-NEXT: pand %xmm1, %xmm2
34285 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34286 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34287 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34288 ; SSSE3-NEXT: psrlw $4, %xmm0
34289 ; SSSE3-NEXT: pand %xmm1, %xmm0
34290 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34291 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34292 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34293 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
34294 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34295 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
34296 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
34297 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483698,2147483698]
34298 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
34299 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
34300 ; SSSE3-NEXT: pand %xmm1, %xmm2
34301 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
34302 ; SSSE3-NEXT: por %xmm2, %xmm0
34305 ; SSE41-LABEL: ult_50_v2i64:
34307 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34308 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34309 ; SSE41-NEXT: pand %xmm1, %xmm2
34310 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34311 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34312 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34313 ; SSE41-NEXT: psrlw $4, %xmm0
34314 ; SSE41-NEXT: pand %xmm1, %xmm0
34315 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34316 ; SSE41-NEXT: paddb %xmm4, %xmm3
34317 ; SSE41-NEXT: pxor %xmm0, %xmm0
34318 ; SSE41-NEXT: psadbw %xmm0, %xmm3
34319 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34320 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
34321 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
34322 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483698,2147483698]
34323 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
34324 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
34325 ; SSE41-NEXT: pand %xmm1, %xmm2
34326 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
34327 ; SSE41-NEXT: por %xmm2, %xmm0
34330 ; AVX1-LABEL: ult_50_v2i64:
34332 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34333 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34334 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34335 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34336 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34337 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34338 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34339 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34340 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34341 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34342 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50]
34343 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34346 ; AVX2-LABEL: ult_50_v2i64:
34348 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34349 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34350 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34351 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34352 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34353 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34354 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34355 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34356 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34357 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34358 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50]
34359 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34362 ; AVX512VPOPCNTDQ-LABEL: ult_50_v2i64:
34363 ; AVX512VPOPCNTDQ: # %bb.0:
34364 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34365 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34366 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50]
34367 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34368 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34369 ; AVX512VPOPCNTDQ-NEXT: retq
34371 ; AVX512VPOPCNTDQVL-LABEL: ult_50_v2i64:
34372 ; AVX512VPOPCNTDQVL: # %bb.0:
34373 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34374 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34375 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34376 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34377 ; AVX512VPOPCNTDQVL-NEXT: retq
34379 ; BITALG_NOVLX-LABEL: ult_50_v2i64:
34380 ; BITALG_NOVLX: # %bb.0:
34381 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34382 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34383 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34384 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34385 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50]
34386 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34387 ; BITALG_NOVLX-NEXT: vzeroupper
34388 ; BITALG_NOVLX-NEXT: retq
34390 ; BITALG-LABEL: ult_50_v2i64:
34392 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34393 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34394 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34395 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34396 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34397 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34398 ; BITALG-NEXT: retq
34399 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34400 %3 = icmp ult <2 x i64> %2, <i64 50, i64 50>
34401 %4 = sext <2 x i1> %3 to <2 x i64>
34405 define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) {
34406 ; SSE2-LABEL: ugt_50_v2i64:
34408 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34409 ; SSE2-NEXT: psrlw $1, %xmm1
34410 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34411 ; SSE2-NEXT: psubb %xmm1, %xmm0
34412 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34413 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34414 ; SSE2-NEXT: pand %xmm1, %xmm2
34415 ; SSE2-NEXT: psrlw $2, %xmm0
34416 ; SSE2-NEXT: pand %xmm1, %xmm0
34417 ; SSE2-NEXT: paddb %xmm2, %xmm0
34418 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34419 ; SSE2-NEXT: psrlw $4, %xmm1
34420 ; SSE2-NEXT: paddb %xmm0, %xmm1
34421 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34422 ; SSE2-NEXT: pxor %xmm0, %xmm0
34423 ; SSE2-NEXT: psadbw %xmm0, %xmm1
34424 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34425 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34426 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
34427 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34428 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
34429 ; SSE2-NEXT: pand %xmm2, %xmm3
34430 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
34431 ; SSE2-NEXT: por %xmm3, %xmm0
34434 ; SSE3-LABEL: ugt_50_v2i64:
34436 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34437 ; SSE3-NEXT: psrlw $1, %xmm1
34438 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34439 ; SSE3-NEXT: psubb %xmm1, %xmm0
34440 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34441 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34442 ; SSE3-NEXT: pand %xmm1, %xmm2
34443 ; SSE3-NEXT: psrlw $2, %xmm0
34444 ; SSE3-NEXT: pand %xmm1, %xmm0
34445 ; SSE3-NEXT: paddb %xmm2, %xmm0
34446 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34447 ; SSE3-NEXT: psrlw $4, %xmm1
34448 ; SSE3-NEXT: paddb %xmm0, %xmm1
34449 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34450 ; SSE3-NEXT: pxor %xmm0, %xmm0
34451 ; SSE3-NEXT: psadbw %xmm0, %xmm1
34452 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34453 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34454 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
34455 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34456 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
34457 ; SSE3-NEXT: pand %xmm2, %xmm3
34458 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
34459 ; SSE3-NEXT: por %xmm3, %xmm0
34462 ; SSSE3-LABEL: ugt_50_v2i64:
34464 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34465 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34466 ; SSSE3-NEXT: pand %xmm1, %xmm2
34467 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34468 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34469 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34470 ; SSSE3-NEXT: psrlw $4, %xmm0
34471 ; SSSE3-NEXT: pand %xmm1, %xmm0
34472 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34473 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34474 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34475 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
34476 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34477 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
34478 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
34479 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34480 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
34481 ; SSSE3-NEXT: pand %xmm1, %xmm2
34482 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
34483 ; SSSE3-NEXT: por %xmm2, %xmm0
34486 ; SSE41-LABEL: ugt_50_v2i64:
34488 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34489 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34490 ; SSE41-NEXT: pand %xmm1, %xmm2
34491 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34492 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34493 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34494 ; SSE41-NEXT: psrlw $4, %xmm0
34495 ; SSE41-NEXT: pand %xmm1, %xmm0
34496 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34497 ; SSE41-NEXT: paddb %xmm4, %xmm3
34498 ; SSE41-NEXT: pxor %xmm0, %xmm0
34499 ; SSE41-NEXT: psadbw %xmm0, %xmm3
34500 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34501 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
34502 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
34503 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34504 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
34505 ; SSE41-NEXT: pand %xmm1, %xmm2
34506 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
34507 ; SSE41-NEXT: por %xmm2, %xmm0
34510 ; AVX1-LABEL: ugt_50_v2i64:
34512 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34513 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34514 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34515 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34516 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34517 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34518 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34519 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34520 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34521 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34522 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34525 ; AVX2-LABEL: ugt_50_v2i64:
34527 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34528 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34529 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34530 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34531 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34532 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34533 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34534 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34535 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34536 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34537 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34540 ; AVX512VPOPCNTDQ-LABEL: ugt_50_v2i64:
34541 ; AVX512VPOPCNTDQ: # %bb.0:
34542 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34543 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34544 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34545 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34546 ; AVX512VPOPCNTDQ-NEXT: retq
34548 ; AVX512VPOPCNTDQVL-LABEL: ugt_50_v2i64:
34549 ; AVX512VPOPCNTDQVL: # %bb.0:
34550 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34551 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34552 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34553 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34554 ; AVX512VPOPCNTDQVL-NEXT: retq
34556 ; BITALG_NOVLX-LABEL: ugt_50_v2i64:
34557 ; BITALG_NOVLX: # %bb.0:
34558 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34559 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34560 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34561 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34562 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34563 ; BITALG_NOVLX-NEXT: vzeroupper
34564 ; BITALG_NOVLX-NEXT: retq
34566 ; BITALG-LABEL: ugt_50_v2i64:
34568 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34569 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34570 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34571 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34572 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34573 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34574 ; BITALG-NEXT: retq
34575 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34576 %3 = icmp ugt <2 x i64> %2, <i64 50, i64 50>
34577 %4 = sext <2 x i1> %3 to <2 x i64>
34581 define <2 x i64> @ult_51_v2i64(<2 x i64> %0) {
34582 ; SSE2-LABEL: ult_51_v2i64:
34584 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34585 ; SSE2-NEXT: psrlw $1, %xmm1
34586 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34587 ; SSE2-NEXT: psubb %xmm1, %xmm0
34588 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34589 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34590 ; SSE2-NEXT: pand %xmm1, %xmm2
34591 ; SSE2-NEXT: psrlw $2, %xmm0
34592 ; SSE2-NEXT: pand %xmm1, %xmm0
34593 ; SSE2-NEXT: paddb %xmm2, %xmm0
34594 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34595 ; SSE2-NEXT: psrlw $4, %xmm1
34596 ; SSE2-NEXT: paddb %xmm0, %xmm1
34597 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34598 ; SSE2-NEXT: pxor %xmm0, %xmm0
34599 ; SSE2-NEXT: psadbw %xmm0, %xmm1
34600 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34601 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34602 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
34603 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483699,2147483699]
34604 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
34605 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34606 ; SSE2-NEXT: pand %xmm2, %xmm1
34607 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
34608 ; SSE2-NEXT: por %xmm1, %xmm0
34611 ; SSE3-LABEL: ult_51_v2i64:
34613 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34614 ; SSE3-NEXT: psrlw $1, %xmm1
34615 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34616 ; SSE3-NEXT: psubb %xmm1, %xmm0
34617 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34618 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34619 ; SSE3-NEXT: pand %xmm1, %xmm2
34620 ; SSE3-NEXT: psrlw $2, %xmm0
34621 ; SSE3-NEXT: pand %xmm1, %xmm0
34622 ; SSE3-NEXT: paddb %xmm2, %xmm0
34623 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34624 ; SSE3-NEXT: psrlw $4, %xmm1
34625 ; SSE3-NEXT: paddb %xmm0, %xmm1
34626 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34627 ; SSE3-NEXT: pxor %xmm0, %xmm0
34628 ; SSE3-NEXT: psadbw %xmm0, %xmm1
34629 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34630 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34631 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
34632 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483699,2147483699]
34633 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
34634 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34635 ; SSE3-NEXT: pand %xmm2, %xmm1
34636 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
34637 ; SSE3-NEXT: por %xmm1, %xmm0
34640 ; SSSE3-LABEL: ult_51_v2i64:
34642 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34643 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34644 ; SSSE3-NEXT: pand %xmm1, %xmm2
34645 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34646 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34647 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34648 ; SSSE3-NEXT: psrlw $4, %xmm0
34649 ; SSSE3-NEXT: pand %xmm1, %xmm0
34650 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34651 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34652 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34653 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
34654 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34655 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
34656 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
34657 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483699,2147483699]
34658 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
34659 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
34660 ; SSSE3-NEXT: pand %xmm1, %xmm2
34661 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
34662 ; SSSE3-NEXT: por %xmm2, %xmm0
34665 ; SSE41-LABEL: ult_51_v2i64:
34667 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34668 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34669 ; SSE41-NEXT: pand %xmm1, %xmm2
34670 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34671 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34672 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34673 ; SSE41-NEXT: psrlw $4, %xmm0
34674 ; SSE41-NEXT: pand %xmm1, %xmm0
34675 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34676 ; SSE41-NEXT: paddb %xmm4, %xmm3
34677 ; SSE41-NEXT: pxor %xmm0, %xmm0
34678 ; SSE41-NEXT: psadbw %xmm0, %xmm3
34679 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34680 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
34681 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
34682 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483699,2147483699]
34683 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
34684 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
34685 ; SSE41-NEXT: pand %xmm1, %xmm2
34686 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
34687 ; SSE41-NEXT: por %xmm2, %xmm0
34690 ; AVX1-LABEL: ult_51_v2i64:
34692 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34693 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34694 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34695 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34696 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34697 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34698 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34699 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34700 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34701 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34702 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51]
34703 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34706 ; AVX2-LABEL: ult_51_v2i64:
34708 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34709 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34710 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34711 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34712 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34713 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34714 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34715 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34716 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34717 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34718 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51]
34719 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34722 ; AVX512VPOPCNTDQ-LABEL: ult_51_v2i64:
34723 ; AVX512VPOPCNTDQ: # %bb.0:
34724 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34725 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34726 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51]
34727 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34728 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34729 ; AVX512VPOPCNTDQ-NEXT: retq
34731 ; AVX512VPOPCNTDQVL-LABEL: ult_51_v2i64:
34732 ; AVX512VPOPCNTDQVL: # %bb.0:
34733 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34734 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34735 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34736 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34737 ; AVX512VPOPCNTDQVL-NEXT: retq
34739 ; BITALG_NOVLX-LABEL: ult_51_v2i64:
34740 ; BITALG_NOVLX: # %bb.0:
34741 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34742 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34743 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34744 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34745 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51]
34746 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34747 ; BITALG_NOVLX-NEXT: vzeroupper
34748 ; BITALG_NOVLX-NEXT: retq
34750 ; BITALG-LABEL: ult_51_v2i64:
34752 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34753 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34754 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34755 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34756 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34757 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34758 ; BITALG-NEXT: retq
34759 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34760 %3 = icmp ult <2 x i64> %2, <i64 51, i64 51>
34761 %4 = sext <2 x i1> %3 to <2 x i64>
34765 define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) {
34766 ; SSE2-LABEL: ugt_51_v2i64:
34768 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34769 ; SSE2-NEXT: psrlw $1, %xmm1
34770 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34771 ; SSE2-NEXT: psubb %xmm1, %xmm0
34772 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34773 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34774 ; SSE2-NEXT: pand %xmm1, %xmm2
34775 ; SSE2-NEXT: psrlw $2, %xmm0
34776 ; SSE2-NEXT: pand %xmm1, %xmm0
34777 ; SSE2-NEXT: paddb %xmm2, %xmm0
34778 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34779 ; SSE2-NEXT: psrlw $4, %xmm1
34780 ; SSE2-NEXT: paddb %xmm0, %xmm1
34781 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34782 ; SSE2-NEXT: pxor %xmm0, %xmm0
34783 ; SSE2-NEXT: psadbw %xmm0, %xmm1
34784 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34785 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34786 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
34787 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34788 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
34789 ; SSE2-NEXT: pand %xmm2, %xmm3
34790 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
34791 ; SSE2-NEXT: por %xmm3, %xmm0
34794 ; SSE3-LABEL: ugt_51_v2i64:
34796 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34797 ; SSE3-NEXT: psrlw $1, %xmm1
34798 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34799 ; SSE3-NEXT: psubb %xmm1, %xmm0
34800 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34801 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34802 ; SSE3-NEXT: pand %xmm1, %xmm2
34803 ; SSE3-NEXT: psrlw $2, %xmm0
34804 ; SSE3-NEXT: pand %xmm1, %xmm0
34805 ; SSE3-NEXT: paddb %xmm2, %xmm0
34806 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34807 ; SSE3-NEXT: psrlw $4, %xmm1
34808 ; SSE3-NEXT: paddb %xmm0, %xmm1
34809 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34810 ; SSE3-NEXT: pxor %xmm0, %xmm0
34811 ; SSE3-NEXT: psadbw %xmm0, %xmm1
34812 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34813 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34814 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
34815 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34816 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
34817 ; SSE3-NEXT: pand %xmm2, %xmm3
34818 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
34819 ; SSE3-NEXT: por %xmm3, %xmm0
34822 ; SSSE3-LABEL: ugt_51_v2i64:
34824 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34825 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34826 ; SSSE3-NEXT: pand %xmm1, %xmm2
34827 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34828 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34829 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34830 ; SSSE3-NEXT: psrlw $4, %xmm0
34831 ; SSSE3-NEXT: pand %xmm1, %xmm0
34832 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34833 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34834 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34835 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
34836 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34837 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
34838 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
34839 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34840 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
34841 ; SSSE3-NEXT: pand %xmm1, %xmm2
34842 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
34843 ; SSSE3-NEXT: por %xmm2, %xmm0
34846 ; SSE41-LABEL: ugt_51_v2i64:
34848 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34849 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34850 ; SSE41-NEXT: pand %xmm1, %xmm2
34851 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34852 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34853 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34854 ; SSE41-NEXT: psrlw $4, %xmm0
34855 ; SSE41-NEXT: pand %xmm1, %xmm0
34856 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34857 ; SSE41-NEXT: paddb %xmm4, %xmm3
34858 ; SSE41-NEXT: pxor %xmm0, %xmm0
34859 ; SSE41-NEXT: psadbw %xmm0, %xmm3
34860 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34861 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
34862 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
34863 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
34864 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
34865 ; SSE41-NEXT: pand %xmm1, %xmm2
34866 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
34867 ; SSE41-NEXT: por %xmm2, %xmm0
34870 ; AVX1-LABEL: ugt_51_v2i64:
34872 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34873 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34874 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34875 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34876 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34877 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34878 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34879 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34880 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34881 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34882 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34885 ; AVX2-LABEL: ugt_51_v2i64:
34887 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34888 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34889 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34890 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34891 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34892 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34893 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34894 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34895 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34896 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34897 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34900 ; AVX512VPOPCNTDQ-LABEL: ugt_51_v2i64:
34901 ; AVX512VPOPCNTDQ: # %bb.0:
34902 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34903 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34904 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34905 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34906 ; AVX512VPOPCNTDQ-NEXT: retq
34908 ; AVX512VPOPCNTDQVL-LABEL: ugt_51_v2i64:
34909 ; AVX512VPOPCNTDQVL: # %bb.0:
34910 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34911 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34912 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34913 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34914 ; AVX512VPOPCNTDQVL-NEXT: retq
34916 ; BITALG_NOVLX-LABEL: ugt_51_v2i64:
34917 ; BITALG_NOVLX: # %bb.0:
34918 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34919 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34920 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34921 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34922 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34923 ; BITALG_NOVLX-NEXT: vzeroupper
34924 ; BITALG_NOVLX-NEXT: retq
34926 ; BITALG-LABEL: ugt_51_v2i64:
34928 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34929 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34930 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34931 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
34932 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
34933 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
34934 ; BITALG-NEXT: retq
34935 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34936 %3 = icmp ugt <2 x i64> %2, <i64 51, i64 51>
34937 %4 = sext <2 x i1> %3 to <2 x i64>
34941 define <2 x i64> @ult_52_v2i64(<2 x i64> %0) {
34942 ; SSE2-LABEL: ult_52_v2i64:
34944 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34945 ; SSE2-NEXT: psrlw $1, %xmm1
34946 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34947 ; SSE2-NEXT: psubb %xmm1, %xmm0
34948 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34949 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34950 ; SSE2-NEXT: pand %xmm1, %xmm2
34951 ; SSE2-NEXT: psrlw $2, %xmm0
34952 ; SSE2-NEXT: pand %xmm1, %xmm0
34953 ; SSE2-NEXT: paddb %xmm2, %xmm0
34954 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34955 ; SSE2-NEXT: psrlw $4, %xmm1
34956 ; SSE2-NEXT: paddb %xmm0, %xmm1
34957 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34958 ; SSE2-NEXT: pxor %xmm0, %xmm0
34959 ; SSE2-NEXT: psadbw %xmm0, %xmm1
34960 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34961 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34962 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
34963 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483700,2147483700]
34964 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
34965 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34966 ; SSE2-NEXT: pand %xmm2, %xmm1
34967 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
34968 ; SSE2-NEXT: por %xmm1, %xmm0
34971 ; SSE3-LABEL: ult_52_v2i64:
34973 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34974 ; SSE3-NEXT: psrlw $1, %xmm1
34975 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34976 ; SSE3-NEXT: psubb %xmm1, %xmm0
34977 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34978 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34979 ; SSE3-NEXT: pand %xmm1, %xmm2
34980 ; SSE3-NEXT: psrlw $2, %xmm0
34981 ; SSE3-NEXT: pand %xmm1, %xmm0
34982 ; SSE3-NEXT: paddb %xmm2, %xmm0
34983 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34984 ; SSE3-NEXT: psrlw $4, %xmm1
34985 ; SSE3-NEXT: paddb %xmm0, %xmm1
34986 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34987 ; SSE3-NEXT: pxor %xmm0, %xmm0
34988 ; SSE3-NEXT: psadbw %xmm0, %xmm1
34989 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34990 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
34991 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
34992 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483700,2147483700]
34993 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
34994 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34995 ; SSE3-NEXT: pand %xmm2, %xmm1
34996 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
34997 ; SSE3-NEXT: por %xmm1, %xmm0
35000 ; SSSE3-LABEL: ult_52_v2i64:
35002 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35003 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35004 ; SSSE3-NEXT: pand %xmm1, %xmm2
35005 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35006 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35007 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35008 ; SSSE3-NEXT: psrlw $4, %xmm0
35009 ; SSSE3-NEXT: pand %xmm1, %xmm0
35010 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35011 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35012 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35013 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
35014 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35015 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35016 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
35017 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483700,2147483700]
35018 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
35019 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
35020 ; SSSE3-NEXT: pand %xmm1, %xmm2
35021 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
35022 ; SSSE3-NEXT: por %xmm2, %xmm0
35025 ; SSE41-LABEL: ult_52_v2i64:
35027 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35028 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35029 ; SSE41-NEXT: pand %xmm1, %xmm2
35030 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35031 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35032 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35033 ; SSE41-NEXT: psrlw $4, %xmm0
35034 ; SSE41-NEXT: pand %xmm1, %xmm0
35035 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35036 ; SSE41-NEXT: paddb %xmm4, %xmm3
35037 ; SSE41-NEXT: pxor %xmm0, %xmm0
35038 ; SSE41-NEXT: psadbw %xmm0, %xmm3
35039 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35040 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35041 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
35042 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483700,2147483700]
35043 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
35044 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
35045 ; SSE41-NEXT: pand %xmm1, %xmm2
35046 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
35047 ; SSE41-NEXT: por %xmm2, %xmm0
35050 ; AVX1-LABEL: ult_52_v2i64:
35052 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35053 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35054 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35055 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35056 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35057 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35058 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35059 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35060 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35061 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35062 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52]
35063 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35066 ; AVX2-LABEL: ult_52_v2i64:
35068 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35069 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35070 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35071 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35072 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35073 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35074 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35075 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35076 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35077 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35078 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52]
35079 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35082 ; AVX512VPOPCNTDQ-LABEL: ult_52_v2i64:
35083 ; AVX512VPOPCNTDQ: # %bb.0:
35084 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35085 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35086 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52]
35087 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35088 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35089 ; AVX512VPOPCNTDQ-NEXT: retq
35091 ; AVX512VPOPCNTDQVL-LABEL: ult_52_v2i64:
35092 ; AVX512VPOPCNTDQVL: # %bb.0:
35093 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35094 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35095 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35096 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35097 ; AVX512VPOPCNTDQVL-NEXT: retq
35099 ; BITALG_NOVLX-LABEL: ult_52_v2i64:
35100 ; BITALG_NOVLX: # %bb.0:
35101 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35102 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
35103 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35104 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35105 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52]
35106 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35107 ; BITALG_NOVLX-NEXT: vzeroupper
35108 ; BITALG_NOVLX-NEXT: retq
35110 ; BITALG-LABEL: ult_52_v2i64:
35112 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
35113 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
35114 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35115 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35116 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35117 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35118 ; BITALG-NEXT: retq
35119 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35120 %3 = icmp ult <2 x i64> %2, <i64 52, i64 52>
35121 %4 = sext <2 x i1> %3 to <2 x i64>
35125 define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) {
35126 ; SSE2-LABEL: ugt_52_v2i64:
35128 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35129 ; SSE2-NEXT: psrlw $1, %xmm1
35130 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35131 ; SSE2-NEXT: psubb %xmm1, %xmm0
35132 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35133 ; SSE2-NEXT: movdqa %xmm0, %xmm2
35134 ; SSE2-NEXT: pand %xmm1, %xmm2
35135 ; SSE2-NEXT: psrlw $2, %xmm0
35136 ; SSE2-NEXT: pand %xmm1, %xmm0
35137 ; SSE2-NEXT: paddb %xmm2, %xmm0
35138 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35139 ; SSE2-NEXT: psrlw $4, %xmm1
35140 ; SSE2-NEXT: paddb %xmm0, %xmm1
35141 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35142 ; SSE2-NEXT: pxor %xmm0, %xmm0
35143 ; SSE2-NEXT: psadbw %xmm0, %xmm1
35144 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35145 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
35146 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
35147 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35148 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
35149 ; SSE2-NEXT: pand %xmm2, %xmm3
35150 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
35151 ; SSE2-NEXT: por %xmm3, %xmm0
35154 ; SSE3-LABEL: ugt_52_v2i64:
35156 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35157 ; SSE3-NEXT: psrlw $1, %xmm1
35158 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35159 ; SSE3-NEXT: psubb %xmm1, %xmm0
35160 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35161 ; SSE3-NEXT: movdqa %xmm0, %xmm2
35162 ; SSE3-NEXT: pand %xmm1, %xmm2
35163 ; SSE3-NEXT: psrlw $2, %xmm0
35164 ; SSE3-NEXT: pand %xmm1, %xmm0
35165 ; SSE3-NEXT: paddb %xmm2, %xmm0
35166 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35167 ; SSE3-NEXT: psrlw $4, %xmm1
35168 ; SSE3-NEXT: paddb %xmm0, %xmm1
35169 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35170 ; SSE3-NEXT: pxor %xmm0, %xmm0
35171 ; SSE3-NEXT: psadbw %xmm0, %xmm1
35172 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35173 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
35174 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
35175 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35176 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
35177 ; SSE3-NEXT: pand %xmm2, %xmm3
35178 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
35179 ; SSE3-NEXT: por %xmm3, %xmm0
35182 ; SSSE3-LABEL: ugt_52_v2i64:
35184 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35185 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35186 ; SSSE3-NEXT: pand %xmm1, %xmm2
35187 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35188 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35189 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35190 ; SSSE3-NEXT: psrlw $4, %xmm0
35191 ; SSSE3-NEXT: pand %xmm1, %xmm0
35192 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35193 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35194 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35195 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
35196 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35197 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35198 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
35199 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35200 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
35201 ; SSSE3-NEXT: pand %xmm1, %xmm2
35202 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
35203 ; SSSE3-NEXT: por %xmm2, %xmm0
35206 ; SSE41-LABEL: ugt_52_v2i64:
35208 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35209 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35210 ; SSE41-NEXT: pand %xmm1, %xmm2
35211 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35212 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35213 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35214 ; SSE41-NEXT: psrlw $4, %xmm0
35215 ; SSE41-NEXT: pand %xmm1, %xmm0
35216 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35217 ; SSE41-NEXT: paddb %xmm4, %xmm3
35218 ; SSE41-NEXT: pxor %xmm0, %xmm0
35219 ; SSE41-NEXT: psadbw %xmm0, %xmm3
35220 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35221 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35222 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
35223 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35224 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
35225 ; SSE41-NEXT: pand %xmm1, %xmm2
35226 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
35227 ; SSE41-NEXT: por %xmm2, %xmm0
35230 ; AVX1-LABEL: ugt_52_v2i64:
35232 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35233 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35234 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35235 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35236 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35237 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35238 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35239 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35240 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35241 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35242 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35245 ; AVX2-LABEL: ugt_52_v2i64:
35247 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35248 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35249 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35250 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35251 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35252 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35253 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35254 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35255 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35256 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35257 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35260 ; AVX512VPOPCNTDQ-LABEL: ugt_52_v2i64:
35261 ; AVX512VPOPCNTDQ: # %bb.0:
35262 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35263 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35264 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35265 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35266 ; AVX512VPOPCNTDQ-NEXT: retq
35268 ; AVX512VPOPCNTDQVL-LABEL: ugt_52_v2i64:
35269 ; AVX512VPOPCNTDQVL: # %bb.0:
35270 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35271 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35272 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35273 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35274 ; AVX512VPOPCNTDQVL-NEXT: retq
35276 ; BITALG_NOVLX-LABEL: ugt_52_v2i64:
35277 ; BITALG_NOVLX: # %bb.0:
35278 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35279 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
35280 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35281 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35282 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35283 ; BITALG_NOVLX-NEXT: vzeroupper
35284 ; BITALG_NOVLX-NEXT: retq
35286 ; BITALG-LABEL: ugt_52_v2i64:
35288 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
35289 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
35290 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35291 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35292 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35293 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35294 ; BITALG-NEXT: retq
35295 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35296 %3 = icmp ugt <2 x i64> %2, <i64 52, i64 52>
35297 %4 = sext <2 x i1> %3 to <2 x i64>
35301 define <2 x i64> @ult_53_v2i64(<2 x i64> %0) {
35302 ; SSE2-LABEL: ult_53_v2i64:
35304 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35305 ; SSE2-NEXT: psrlw $1, %xmm1
35306 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35307 ; SSE2-NEXT: psubb %xmm1, %xmm0
35308 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35309 ; SSE2-NEXT: movdqa %xmm0, %xmm2
35310 ; SSE2-NEXT: pand %xmm1, %xmm2
35311 ; SSE2-NEXT: psrlw $2, %xmm0
35312 ; SSE2-NEXT: pand %xmm1, %xmm0
35313 ; SSE2-NEXT: paddb %xmm2, %xmm0
35314 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35315 ; SSE2-NEXT: psrlw $4, %xmm1
35316 ; SSE2-NEXT: paddb %xmm0, %xmm1
35317 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35318 ; SSE2-NEXT: pxor %xmm0, %xmm0
35319 ; SSE2-NEXT: psadbw %xmm0, %xmm1
35320 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35321 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
35322 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
35323 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483701,2147483701]
35324 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
35325 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35326 ; SSE2-NEXT: pand %xmm2, %xmm1
35327 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
35328 ; SSE2-NEXT: por %xmm1, %xmm0
35331 ; SSE3-LABEL: ult_53_v2i64:
35333 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35334 ; SSE3-NEXT: psrlw $1, %xmm1
35335 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35336 ; SSE3-NEXT: psubb %xmm1, %xmm0
35337 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35338 ; SSE3-NEXT: movdqa %xmm0, %xmm2
35339 ; SSE3-NEXT: pand %xmm1, %xmm2
35340 ; SSE3-NEXT: psrlw $2, %xmm0
35341 ; SSE3-NEXT: pand %xmm1, %xmm0
35342 ; SSE3-NEXT: paddb %xmm2, %xmm0
35343 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35344 ; SSE3-NEXT: psrlw $4, %xmm1
35345 ; SSE3-NEXT: paddb %xmm0, %xmm1
35346 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35347 ; SSE3-NEXT: pxor %xmm0, %xmm0
35348 ; SSE3-NEXT: psadbw %xmm0, %xmm1
35349 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35350 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
35351 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
35352 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483701,2147483701]
35353 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
35354 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35355 ; SSE3-NEXT: pand %xmm2, %xmm1
35356 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
35357 ; SSE3-NEXT: por %xmm1, %xmm0
35360 ; SSSE3-LABEL: ult_53_v2i64:
35362 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35363 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35364 ; SSSE3-NEXT: pand %xmm1, %xmm2
35365 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35366 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35367 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35368 ; SSSE3-NEXT: psrlw $4, %xmm0
35369 ; SSSE3-NEXT: pand %xmm1, %xmm0
35370 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35371 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35372 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35373 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
35374 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35375 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35376 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
35377 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483701,2147483701]
35378 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
35379 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
35380 ; SSSE3-NEXT: pand %xmm1, %xmm2
35381 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
35382 ; SSSE3-NEXT: por %xmm2, %xmm0
35385 ; SSE41-LABEL: ult_53_v2i64:
35387 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35388 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35389 ; SSE41-NEXT: pand %xmm1, %xmm2
35390 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35391 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35392 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35393 ; SSE41-NEXT: psrlw $4, %xmm0
35394 ; SSE41-NEXT: pand %xmm1, %xmm0
35395 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35396 ; SSE41-NEXT: paddb %xmm4, %xmm3
35397 ; SSE41-NEXT: pxor %xmm0, %xmm0
35398 ; SSE41-NEXT: psadbw %xmm0, %xmm3
35399 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35400 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35401 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
35402 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483701,2147483701]
35403 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
35404 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
35405 ; SSE41-NEXT: pand %xmm1, %xmm2
35406 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
35407 ; SSE41-NEXT: por %xmm2, %xmm0
35410 ; AVX1-LABEL: ult_53_v2i64:
35412 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35413 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35414 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35415 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35416 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35417 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35418 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35419 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35420 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35421 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35422 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53]
35423 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35426 ; AVX2-LABEL: ult_53_v2i64:
35428 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35429 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35430 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35431 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35432 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35433 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35434 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35435 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35436 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35437 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35438 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53]
35439 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35442 ; AVX512VPOPCNTDQ-LABEL: ult_53_v2i64:
35443 ; AVX512VPOPCNTDQ: # %bb.0:
35444 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35445 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35446 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53]
35447 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35448 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35449 ; AVX512VPOPCNTDQ-NEXT: retq
35451 ; AVX512VPOPCNTDQVL-LABEL: ult_53_v2i64:
35452 ; AVX512VPOPCNTDQVL: # %bb.0:
35453 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35454 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35455 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35456 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35457 ; AVX512VPOPCNTDQVL-NEXT: retq
35459 ; BITALG_NOVLX-LABEL: ult_53_v2i64:
35460 ; BITALG_NOVLX: # %bb.0:
35461 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35462 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
35463 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35464 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35465 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53]
35466 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35467 ; BITALG_NOVLX-NEXT: vzeroupper
35468 ; BITALG_NOVLX-NEXT: retq
35470 ; BITALG-LABEL: ult_53_v2i64:
35472 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
35473 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
35474 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35475 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35476 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35477 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35478 ; BITALG-NEXT: retq
35479 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35480 %3 = icmp ult <2 x i64> %2, <i64 53, i64 53>
35481 %4 = sext <2 x i1> %3 to <2 x i64>
35485 define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) {
35486 ; SSE2-LABEL: ugt_53_v2i64:
35488 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35489 ; SSE2-NEXT: psrlw $1, %xmm1
35490 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35491 ; SSE2-NEXT: psubb %xmm1, %xmm0
35492 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35493 ; SSE2-NEXT: movdqa %xmm0, %xmm2
35494 ; SSE2-NEXT: pand %xmm1, %xmm2
35495 ; SSE2-NEXT: psrlw $2, %xmm0
35496 ; SSE2-NEXT: pand %xmm1, %xmm0
35497 ; SSE2-NEXT: paddb %xmm2, %xmm0
35498 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35499 ; SSE2-NEXT: psrlw $4, %xmm1
35500 ; SSE2-NEXT: paddb %xmm0, %xmm1
35501 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35502 ; SSE2-NEXT: pxor %xmm0, %xmm0
35503 ; SSE2-NEXT: psadbw %xmm0, %xmm1
35504 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35505 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
35506 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
35507 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35508 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
35509 ; SSE2-NEXT: pand %xmm2, %xmm3
35510 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
35511 ; SSE2-NEXT: por %xmm3, %xmm0
35514 ; SSE3-LABEL: ugt_53_v2i64:
35516 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35517 ; SSE3-NEXT: psrlw $1, %xmm1
35518 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35519 ; SSE3-NEXT: psubb %xmm1, %xmm0
35520 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35521 ; SSE3-NEXT: movdqa %xmm0, %xmm2
35522 ; SSE3-NEXT: pand %xmm1, %xmm2
35523 ; SSE3-NEXT: psrlw $2, %xmm0
35524 ; SSE3-NEXT: pand %xmm1, %xmm0
35525 ; SSE3-NEXT: paddb %xmm2, %xmm0
35526 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35527 ; SSE3-NEXT: psrlw $4, %xmm1
35528 ; SSE3-NEXT: paddb %xmm0, %xmm1
35529 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35530 ; SSE3-NEXT: pxor %xmm0, %xmm0
35531 ; SSE3-NEXT: psadbw %xmm0, %xmm1
35532 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35533 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
35534 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
35535 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35536 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
35537 ; SSE3-NEXT: pand %xmm2, %xmm3
35538 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
35539 ; SSE3-NEXT: por %xmm3, %xmm0
35542 ; SSSE3-LABEL: ugt_53_v2i64:
35544 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35545 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35546 ; SSSE3-NEXT: pand %xmm1, %xmm2
35547 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35548 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35549 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35550 ; SSSE3-NEXT: psrlw $4, %xmm0
35551 ; SSSE3-NEXT: pand %xmm1, %xmm0
35552 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35553 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35554 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35555 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
35556 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35557 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35558 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
35559 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35560 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
35561 ; SSSE3-NEXT: pand %xmm1, %xmm2
35562 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
35563 ; SSSE3-NEXT: por %xmm2, %xmm0
35566 ; SSE41-LABEL: ugt_53_v2i64:
35568 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35569 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35570 ; SSE41-NEXT: pand %xmm1, %xmm2
35571 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35572 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35573 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35574 ; SSE41-NEXT: psrlw $4, %xmm0
35575 ; SSE41-NEXT: pand %xmm1, %xmm0
35576 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35577 ; SSE41-NEXT: paddb %xmm4, %xmm3
35578 ; SSE41-NEXT: pxor %xmm0, %xmm0
35579 ; SSE41-NEXT: psadbw %xmm0, %xmm3
35580 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35581 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35582 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
35583 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35584 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
35585 ; SSE41-NEXT: pand %xmm1, %xmm2
35586 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
35587 ; SSE41-NEXT: por %xmm2, %xmm0
35590 ; AVX1-LABEL: ugt_53_v2i64:
35592 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35593 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35594 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35595 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35596 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35597 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35598 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35599 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35600 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35601 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35602 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35605 ; AVX2-LABEL: ugt_53_v2i64:
35607 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35608 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35609 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35610 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35611 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35612 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35613 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35614 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35615 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35616 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35617 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35620 ; AVX512VPOPCNTDQ-LABEL: ugt_53_v2i64:
35621 ; AVX512VPOPCNTDQ: # %bb.0:
35622 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35623 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35624 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35625 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35626 ; AVX512VPOPCNTDQ-NEXT: retq
35628 ; AVX512VPOPCNTDQVL-LABEL: ugt_53_v2i64:
35629 ; AVX512VPOPCNTDQVL: # %bb.0:
35630 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35631 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35632 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35633 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35634 ; AVX512VPOPCNTDQVL-NEXT: retq
35636 ; BITALG_NOVLX-LABEL: ugt_53_v2i64:
35637 ; BITALG_NOVLX: # %bb.0:
35638 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35639 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
35640 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35641 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35642 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35643 ; BITALG_NOVLX-NEXT: vzeroupper
35644 ; BITALG_NOVLX-NEXT: retq
35646 ; BITALG-LABEL: ugt_53_v2i64:
35648 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
35649 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
35650 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35651 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35652 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35653 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35654 ; BITALG-NEXT: retq
35655 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35656 %3 = icmp ugt <2 x i64> %2, <i64 53, i64 53>
35657 %4 = sext <2 x i1> %3 to <2 x i64>
35661 define <2 x i64> @ult_54_v2i64(<2 x i64> %0) {
35662 ; SSE2-LABEL: ult_54_v2i64:
35664 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35665 ; SSE2-NEXT: psrlw $1, %xmm1
35666 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35667 ; SSE2-NEXT: psubb %xmm1, %xmm0
35668 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35669 ; SSE2-NEXT: movdqa %xmm0, %xmm2
35670 ; SSE2-NEXT: pand %xmm1, %xmm2
35671 ; SSE2-NEXT: psrlw $2, %xmm0
35672 ; SSE2-NEXT: pand %xmm1, %xmm0
35673 ; SSE2-NEXT: paddb %xmm2, %xmm0
35674 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35675 ; SSE2-NEXT: psrlw $4, %xmm1
35676 ; SSE2-NEXT: paddb %xmm0, %xmm1
35677 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35678 ; SSE2-NEXT: pxor %xmm0, %xmm0
35679 ; SSE2-NEXT: psadbw %xmm0, %xmm1
35680 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35681 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
35682 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
35683 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483702,2147483702]
35684 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
35685 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35686 ; SSE2-NEXT: pand %xmm2, %xmm1
35687 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
35688 ; SSE2-NEXT: por %xmm1, %xmm0
35691 ; SSE3-LABEL: ult_54_v2i64:
35693 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35694 ; SSE3-NEXT: psrlw $1, %xmm1
35695 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35696 ; SSE3-NEXT: psubb %xmm1, %xmm0
35697 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35698 ; SSE3-NEXT: movdqa %xmm0, %xmm2
35699 ; SSE3-NEXT: pand %xmm1, %xmm2
35700 ; SSE3-NEXT: psrlw $2, %xmm0
35701 ; SSE3-NEXT: pand %xmm1, %xmm0
35702 ; SSE3-NEXT: paddb %xmm2, %xmm0
35703 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35704 ; SSE3-NEXT: psrlw $4, %xmm1
35705 ; SSE3-NEXT: paddb %xmm0, %xmm1
35706 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35707 ; SSE3-NEXT: pxor %xmm0, %xmm0
35708 ; SSE3-NEXT: psadbw %xmm0, %xmm1
35709 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35710 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
35711 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
35712 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483702,2147483702]
35713 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
35714 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35715 ; SSE3-NEXT: pand %xmm2, %xmm1
35716 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
35717 ; SSE3-NEXT: por %xmm1, %xmm0
35720 ; SSSE3-LABEL: ult_54_v2i64:
35722 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35723 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35724 ; SSSE3-NEXT: pand %xmm1, %xmm2
35725 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35726 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35727 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35728 ; SSSE3-NEXT: psrlw $4, %xmm0
35729 ; SSSE3-NEXT: pand %xmm1, %xmm0
35730 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35731 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35732 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35733 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
35734 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35735 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35736 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
35737 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483702,2147483702]
35738 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
35739 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
35740 ; SSSE3-NEXT: pand %xmm1, %xmm2
35741 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
35742 ; SSSE3-NEXT: por %xmm2, %xmm0
35745 ; SSE41-LABEL: ult_54_v2i64:
35747 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35748 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35749 ; SSE41-NEXT: pand %xmm1, %xmm2
35750 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35751 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35752 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35753 ; SSE41-NEXT: psrlw $4, %xmm0
35754 ; SSE41-NEXT: pand %xmm1, %xmm0
35755 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35756 ; SSE41-NEXT: paddb %xmm4, %xmm3
35757 ; SSE41-NEXT: pxor %xmm0, %xmm0
35758 ; SSE41-NEXT: psadbw %xmm0, %xmm3
35759 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35760 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35761 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
35762 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483702,2147483702]
35763 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
35764 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
35765 ; SSE41-NEXT: pand %xmm1, %xmm2
35766 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
35767 ; SSE41-NEXT: por %xmm2, %xmm0
35770 ; AVX1-LABEL: ult_54_v2i64:
35772 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35773 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35774 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35775 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35776 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35777 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35778 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35779 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35780 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35781 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35782 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54]
35783 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35786 ; AVX2-LABEL: ult_54_v2i64:
35788 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35789 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35790 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35791 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35792 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35793 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35794 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35795 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35796 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35797 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35798 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54]
35799 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35802 ; AVX512VPOPCNTDQ-LABEL: ult_54_v2i64:
35803 ; AVX512VPOPCNTDQ: # %bb.0:
35804 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35805 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35806 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54]
35807 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35808 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35809 ; AVX512VPOPCNTDQ-NEXT: retq
35811 ; AVX512VPOPCNTDQVL-LABEL: ult_54_v2i64:
35812 ; AVX512VPOPCNTDQVL: # %bb.0:
35813 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35814 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35815 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35816 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35817 ; AVX512VPOPCNTDQVL-NEXT: retq
35819 ; BITALG_NOVLX-LABEL: ult_54_v2i64:
35820 ; BITALG_NOVLX: # %bb.0:
35821 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35822 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
35823 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35824 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35825 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54]
35826 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35827 ; BITALG_NOVLX-NEXT: vzeroupper
35828 ; BITALG_NOVLX-NEXT: retq
35830 ; BITALG-LABEL: ult_54_v2i64:
35832 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
35833 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
35834 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35835 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35836 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35837 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35838 ; BITALG-NEXT: retq
35839 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35840 %3 = icmp ult <2 x i64> %2, <i64 54, i64 54>
35841 %4 = sext <2 x i1> %3 to <2 x i64>
35845 define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) {
35846 ; SSE2-LABEL: ugt_54_v2i64:
35848 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35849 ; SSE2-NEXT: psrlw $1, %xmm1
35850 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35851 ; SSE2-NEXT: psubb %xmm1, %xmm0
35852 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35853 ; SSE2-NEXT: movdqa %xmm0, %xmm2
35854 ; SSE2-NEXT: pand %xmm1, %xmm2
35855 ; SSE2-NEXT: psrlw $2, %xmm0
35856 ; SSE2-NEXT: pand %xmm1, %xmm0
35857 ; SSE2-NEXT: paddb %xmm2, %xmm0
35858 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35859 ; SSE2-NEXT: psrlw $4, %xmm1
35860 ; SSE2-NEXT: paddb %xmm0, %xmm1
35861 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35862 ; SSE2-NEXT: pxor %xmm0, %xmm0
35863 ; SSE2-NEXT: psadbw %xmm0, %xmm1
35864 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35865 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
35866 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
35867 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35868 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
35869 ; SSE2-NEXT: pand %xmm2, %xmm3
35870 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
35871 ; SSE2-NEXT: por %xmm3, %xmm0
35874 ; SSE3-LABEL: ugt_54_v2i64:
35876 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35877 ; SSE3-NEXT: psrlw $1, %xmm1
35878 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35879 ; SSE3-NEXT: psubb %xmm1, %xmm0
35880 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35881 ; SSE3-NEXT: movdqa %xmm0, %xmm2
35882 ; SSE3-NEXT: pand %xmm1, %xmm2
35883 ; SSE3-NEXT: psrlw $2, %xmm0
35884 ; SSE3-NEXT: pand %xmm1, %xmm0
35885 ; SSE3-NEXT: paddb %xmm2, %xmm0
35886 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35887 ; SSE3-NEXT: psrlw $4, %xmm1
35888 ; SSE3-NEXT: paddb %xmm0, %xmm1
35889 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35890 ; SSE3-NEXT: pxor %xmm0, %xmm0
35891 ; SSE3-NEXT: psadbw %xmm0, %xmm1
35892 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35893 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
35894 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
35895 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35896 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
35897 ; SSE3-NEXT: pand %xmm2, %xmm3
35898 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
35899 ; SSE3-NEXT: por %xmm3, %xmm0
35902 ; SSSE3-LABEL: ugt_54_v2i64:
35904 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35905 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35906 ; SSSE3-NEXT: pand %xmm1, %xmm2
35907 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35908 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35909 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35910 ; SSSE3-NEXT: psrlw $4, %xmm0
35911 ; SSSE3-NEXT: pand %xmm1, %xmm0
35912 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35913 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35914 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35915 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
35916 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35917 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35918 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
35919 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35920 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
35921 ; SSSE3-NEXT: pand %xmm1, %xmm2
35922 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
35923 ; SSSE3-NEXT: por %xmm2, %xmm0
35926 ; SSE41-LABEL: ugt_54_v2i64:
35928 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35929 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35930 ; SSE41-NEXT: pand %xmm1, %xmm2
35931 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35932 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35933 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35934 ; SSE41-NEXT: psrlw $4, %xmm0
35935 ; SSE41-NEXT: pand %xmm1, %xmm0
35936 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35937 ; SSE41-NEXT: paddb %xmm4, %xmm3
35938 ; SSE41-NEXT: pxor %xmm0, %xmm0
35939 ; SSE41-NEXT: psadbw %xmm0, %xmm3
35940 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35941 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
35942 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
35943 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
35944 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
35945 ; SSE41-NEXT: pand %xmm1, %xmm2
35946 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
35947 ; SSE41-NEXT: por %xmm2, %xmm0
35950 ; AVX1-LABEL: ugt_54_v2i64:
35952 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35953 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35954 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35955 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35956 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35957 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35958 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35959 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35960 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35961 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35962 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35965 ; AVX2-LABEL: ugt_54_v2i64:
35967 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35968 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35969 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35970 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35971 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35972 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35973 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35974 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35975 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35976 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35977 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35980 ; AVX512VPOPCNTDQ-LABEL: ugt_54_v2i64:
35981 ; AVX512VPOPCNTDQ: # %bb.0:
35982 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35983 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35984 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35985 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35986 ; AVX512VPOPCNTDQ-NEXT: retq
35988 ; AVX512VPOPCNTDQVL-LABEL: ugt_54_v2i64:
35989 ; AVX512VPOPCNTDQVL: # %bb.0:
35990 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35991 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
35992 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
35993 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
35994 ; AVX512VPOPCNTDQVL-NEXT: retq
35996 ; BITALG_NOVLX-LABEL: ugt_54_v2i64:
35997 ; BITALG_NOVLX: # %bb.0:
35998 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35999 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
36000 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
36001 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36002 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
36003 ; BITALG_NOVLX-NEXT: vzeroupper
36004 ; BITALG_NOVLX-NEXT: retq
36006 ; BITALG-LABEL: ugt_54_v2i64:
36008 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
36009 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
36010 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36011 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36012 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36013 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36014 ; BITALG-NEXT: retq
36015 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36016 %3 = icmp ugt <2 x i64> %2, <i64 54, i64 54>
36017 %4 = sext <2 x i1> %3 to <2 x i64>
36021 define <2 x i64> @ult_55_v2i64(<2 x i64> %0) {
36022 ; SSE2-LABEL: ult_55_v2i64:
36024 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36025 ; SSE2-NEXT: psrlw $1, %xmm1
36026 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36027 ; SSE2-NEXT: psubb %xmm1, %xmm0
36028 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36029 ; SSE2-NEXT: movdqa %xmm0, %xmm2
36030 ; SSE2-NEXT: pand %xmm1, %xmm2
36031 ; SSE2-NEXT: psrlw $2, %xmm0
36032 ; SSE2-NEXT: pand %xmm1, %xmm0
36033 ; SSE2-NEXT: paddb %xmm2, %xmm0
36034 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36035 ; SSE2-NEXT: psrlw $4, %xmm1
36036 ; SSE2-NEXT: paddb %xmm0, %xmm1
36037 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36038 ; SSE2-NEXT: pxor %xmm0, %xmm0
36039 ; SSE2-NEXT: psadbw %xmm0, %xmm1
36040 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36041 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36042 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
36043 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483703,2147483703]
36044 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
36045 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
36046 ; SSE2-NEXT: pand %xmm2, %xmm1
36047 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36048 ; SSE2-NEXT: por %xmm1, %xmm0
36051 ; SSE3-LABEL: ult_55_v2i64:
36053 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36054 ; SSE3-NEXT: psrlw $1, %xmm1
36055 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36056 ; SSE3-NEXT: psubb %xmm1, %xmm0
36057 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36058 ; SSE3-NEXT: movdqa %xmm0, %xmm2
36059 ; SSE3-NEXT: pand %xmm1, %xmm2
36060 ; SSE3-NEXT: psrlw $2, %xmm0
36061 ; SSE3-NEXT: pand %xmm1, %xmm0
36062 ; SSE3-NEXT: paddb %xmm2, %xmm0
36063 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36064 ; SSE3-NEXT: psrlw $4, %xmm1
36065 ; SSE3-NEXT: paddb %xmm0, %xmm1
36066 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36067 ; SSE3-NEXT: pxor %xmm0, %xmm0
36068 ; SSE3-NEXT: psadbw %xmm0, %xmm1
36069 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36070 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36071 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
36072 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483703,2147483703]
36073 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
36074 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
36075 ; SSE3-NEXT: pand %xmm2, %xmm1
36076 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36077 ; SSE3-NEXT: por %xmm1, %xmm0
36080 ; SSSE3-LABEL: ult_55_v2i64:
36082 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36083 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
36084 ; SSSE3-NEXT: pand %xmm1, %xmm2
36085 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36086 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
36087 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
36088 ; SSSE3-NEXT: psrlw $4, %xmm0
36089 ; SSSE3-NEXT: pand %xmm1, %xmm0
36090 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
36091 ; SSSE3-NEXT: paddb %xmm4, %xmm3
36092 ; SSSE3-NEXT: pxor %xmm0, %xmm0
36093 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
36094 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36095 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36096 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
36097 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483703,2147483703]
36098 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
36099 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
36100 ; SSSE3-NEXT: pand %xmm1, %xmm2
36101 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36102 ; SSSE3-NEXT: por %xmm2, %xmm0
36105 ; SSE41-LABEL: ult_55_v2i64:
36107 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36108 ; SSE41-NEXT: movdqa %xmm0, %xmm2
36109 ; SSE41-NEXT: pand %xmm1, %xmm2
36110 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36111 ; SSE41-NEXT: movdqa %xmm3, %xmm4
36112 ; SSE41-NEXT: pshufb %xmm2, %xmm4
36113 ; SSE41-NEXT: psrlw $4, %xmm0
36114 ; SSE41-NEXT: pand %xmm1, %xmm0
36115 ; SSE41-NEXT: pshufb %xmm0, %xmm3
36116 ; SSE41-NEXT: paddb %xmm4, %xmm3
36117 ; SSE41-NEXT: pxor %xmm0, %xmm0
36118 ; SSE41-NEXT: psadbw %xmm0, %xmm3
36119 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36120 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36121 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
36122 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483703,2147483703]
36123 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
36124 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
36125 ; SSE41-NEXT: pand %xmm1, %xmm2
36126 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36127 ; SSE41-NEXT: por %xmm2, %xmm0
36130 ; AVX1-LABEL: ult_55_v2i64:
36132 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36133 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
36134 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36135 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
36136 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
36137 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
36138 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
36139 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
36140 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
36141 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36142 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55]
36143 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36146 ; AVX2-LABEL: ult_55_v2i64:
36148 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36149 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
36150 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36151 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
36152 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
36153 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
36154 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
36155 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
36156 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
36157 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36158 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55]
36159 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36162 ; AVX512VPOPCNTDQ-LABEL: ult_55_v2i64:
36163 ; AVX512VPOPCNTDQ: # %bb.0:
36164 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
36165 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
36166 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55]
36167 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36168 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
36169 ; AVX512VPOPCNTDQ-NEXT: retq
36171 ; AVX512VPOPCNTDQVL-LABEL: ult_55_v2i64:
36172 ; AVX512VPOPCNTDQVL: # %bb.0:
36173 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
36174 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36175 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36176 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36177 ; AVX512VPOPCNTDQVL-NEXT: retq
36179 ; BITALG_NOVLX-LABEL: ult_55_v2i64:
36180 ; BITALG_NOVLX: # %bb.0:
36181 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
36182 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
36183 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
36184 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36185 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55]
36186 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36187 ; BITALG_NOVLX-NEXT: vzeroupper
36188 ; BITALG_NOVLX-NEXT: retq
36190 ; BITALG-LABEL: ult_55_v2i64:
36192 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
36193 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
36194 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36195 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36196 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36197 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36198 ; BITALG-NEXT: retq
36199 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36200 %3 = icmp ult <2 x i64> %2, <i64 55, i64 55>
36201 %4 = sext <2 x i1> %3 to <2 x i64>
36205 define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) {
36206 ; SSE2-LABEL: ugt_55_v2i64:
36208 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36209 ; SSE2-NEXT: psrlw $1, %xmm1
36210 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36211 ; SSE2-NEXT: psubb %xmm1, %xmm0
36212 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36213 ; SSE2-NEXT: movdqa %xmm0, %xmm2
36214 ; SSE2-NEXT: pand %xmm1, %xmm2
36215 ; SSE2-NEXT: psrlw $2, %xmm0
36216 ; SSE2-NEXT: pand %xmm1, %xmm0
36217 ; SSE2-NEXT: paddb %xmm2, %xmm0
36218 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36219 ; SSE2-NEXT: psrlw $4, %xmm1
36220 ; SSE2-NEXT: paddb %xmm0, %xmm1
36221 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36222 ; SSE2-NEXT: pxor %xmm0, %xmm0
36223 ; SSE2-NEXT: psadbw %xmm0, %xmm1
36224 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36225 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36226 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
36227 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36228 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
36229 ; SSE2-NEXT: pand %xmm2, %xmm3
36230 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
36231 ; SSE2-NEXT: por %xmm3, %xmm0
36234 ; SSE3-LABEL: ugt_55_v2i64:
36236 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36237 ; SSE3-NEXT: psrlw $1, %xmm1
36238 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36239 ; SSE3-NEXT: psubb %xmm1, %xmm0
36240 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36241 ; SSE3-NEXT: movdqa %xmm0, %xmm2
36242 ; SSE3-NEXT: pand %xmm1, %xmm2
36243 ; SSE3-NEXT: psrlw $2, %xmm0
36244 ; SSE3-NEXT: pand %xmm1, %xmm0
36245 ; SSE3-NEXT: paddb %xmm2, %xmm0
36246 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36247 ; SSE3-NEXT: psrlw $4, %xmm1
36248 ; SSE3-NEXT: paddb %xmm0, %xmm1
36249 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36250 ; SSE3-NEXT: pxor %xmm0, %xmm0
36251 ; SSE3-NEXT: psadbw %xmm0, %xmm1
36252 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36253 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36254 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
36255 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36256 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
36257 ; SSE3-NEXT: pand %xmm2, %xmm3
36258 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
36259 ; SSE3-NEXT: por %xmm3, %xmm0
36262 ; SSSE3-LABEL: ugt_55_v2i64:
36264 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36265 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
36266 ; SSSE3-NEXT: pand %xmm1, %xmm2
36267 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36268 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
36269 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
36270 ; SSSE3-NEXT: psrlw $4, %xmm0
36271 ; SSSE3-NEXT: pand %xmm1, %xmm0
36272 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
36273 ; SSSE3-NEXT: paddb %xmm4, %xmm3
36274 ; SSSE3-NEXT: pxor %xmm0, %xmm0
36275 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
36276 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36277 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36278 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
36279 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36280 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
36281 ; SSSE3-NEXT: pand %xmm1, %xmm2
36282 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
36283 ; SSSE3-NEXT: por %xmm2, %xmm0
36286 ; SSE41-LABEL: ugt_55_v2i64:
36288 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36289 ; SSE41-NEXT: movdqa %xmm0, %xmm2
36290 ; SSE41-NEXT: pand %xmm1, %xmm2
36291 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36292 ; SSE41-NEXT: movdqa %xmm3, %xmm4
36293 ; SSE41-NEXT: pshufb %xmm2, %xmm4
36294 ; SSE41-NEXT: psrlw $4, %xmm0
36295 ; SSE41-NEXT: pand %xmm1, %xmm0
36296 ; SSE41-NEXT: pshufb %xmm0, %xmm3
36297 ; SSE41-NEXT: paddb %xmm4, %xmm3
36298 ; SSE41-NEXT: pxor %xmm0, %xmm0
36299 ; SSE41-NEXT: psadbw %xmm0, %xmm3
36300 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36301 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36302 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
36303 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36304 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
36305 ; SSE41-NEXT: pand %xmm1, %xmm2
36306 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
36307 ; SSE41-NEXT: por %xmm2, %xmm0
36310 ; AVX1-LABEL: ugt_55_v2i64:
36312 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36313 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
36314 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36315 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
36316 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
36317 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
36318 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
36319 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
36320 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
36321 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36322 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
36325 ; AVX2-LABEL: ugt_55_v2i64:
36327 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36328 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
36329 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36330 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
36331 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
36332 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
36333 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
36334 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
36335 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
36336 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36337 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
36340 ; AVX512VPOPCNTDQ-LABEL: ugt_55_v2i64:
36341 ; AVX512VPOPCNTDQ: # %bb.0:
36342 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
36343 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
36344 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
36345 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
36346 ; AVX512VPOPCNTDQ-NEXT: retq
36348 ; AVX512VPOPCNTDQVL-LABEL: ugt_55_v2i64:
36349 ; AVX512VPOPCNTDQVL: # %bb.0:
36350 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
36351 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36352 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36353 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36354 ; AVX512VPOPCNTDQVL-NEXT: retq
36356 ; BITALG_NOVLX-LABEL: ugt_55_v2i64:
36357 ; BITALG_NOVLX: # %bb.0:
36358 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
36359 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
36360 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
36361 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36362 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
36363 ; BITALG_NOVLX-NEXT: vzeroupper
36364 ; BITALG_NOVLX-NEXT: retq
36366 ; BITALG-LABEL: ugt_55_v2i64:
36368 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
36369 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
36370 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36371 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36372 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36373 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36374 ; BITALG-NEXT: retq
36375 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36376 %3 = icmp ugt <2 x i64> %2, <i64 55, i64 55>
36377 %4 = sext <2 x i1> %3 to <2 x i64>
36381 define <2 x i64> @ult_56_v2i64(<2 x i64> %0) {
36382 ; SSE2-LABEL: ult_56_v2i64:
36384 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36385 ; SSE2-NEXT: psrlw $1, %xmm1
36386 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36387 ; SSE2-NEXT: psubb %xmm1, %xmm0
36388 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36389 ; SSE2-NEXT: movdqa %xmm0, %xmm2
36390 ; SSE2-NEXT: pand %xmm1, %xmm2
36391 ; SSE2-NEXT: psrlw $2, %xmm0
36392 ; SSE2-NEXT: pand %xmm1, %xmm0
36393 ; SSE2-NEXT: paddb %xmm2, %xmm0
36394 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36395 ; SSE2-NEXT: psrlw $4, %xmm1
36396 ; SSE2-NEXT: paddb %xmm0, %xmm1
36397 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36398 ; SSE2-NEXT: pxor %xmm0, %xmm0
36399 ; SSE2-NEXT: psadbw %xmm0, %xmm1
36400 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36401 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36402 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
36403 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483704,2147483704]
36404 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
36405 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
36406 ; SSE2-NEXT: pand %xmm2, %xmm1
36407 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36408 ; SSE2-NEXT: por %xmm1, %xmm0
36411 ; SSE3-LABEL: ult_56_v2i64:
36413 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36414 ; SSE3-NEXT: psrlw $1, %xmm1
36415 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36416 ; SSE3-NEXT: psubb %xmm1, %xmm0
36417 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36418 ; SSE3-NEXT: movdqa %xmm0, %xmm2
36419 ; SSE3-NEXT: pand %xmm1, %xmm2
36420 ; SSE3-NEXT: psrlw $2, %xmm0
36421 ; SSE3-NEXT: pand %xmm1, %xmm0
36422 ; SSE3-NEXT: paddb %xmm2, %xmm0
36423 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36424 ; SSE3-NEXT: psrlw $4, %xmm1
36425 ; SSE3-NEXT: paddb %xmm0, %xmm1
36426 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36427 ; SSE3-NEXT: pxor %xmm0, %xmm0
36428 ; SSE3-NEXT: psadbw %xmm0, %xmm1
36429 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36430 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36431 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
36432 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483704,2147483704]
36433 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
36434 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
36435 ; SSE3-NEXT: pand %xmm2, %xmm1
36436 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36437 ; SSE3-NEXT: por %xmm1, %xmm0
36440 ; SSSE3-LABEL: ult_56_v2i64:
36442 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36443 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
36444 ; SSSE3-NEXT: pand %xmm1, %xmm2
36445 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36446 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
36447 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
36448 ; SSSE3-NEXT: psrlw $4, %xmm0
36449 ; SSSE3-NEXT: pand %xmm1, %xmm0
36450 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
36451 ; SSSE3-NEXT: paddb %xmm4, %xmm3
36452 ; SSSE3-NEXT: pxor %xmm0, %xmm0
36453 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
36454 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36455 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36456 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
36457 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483704,2147483704]
36458 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
36459 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
36460 ; SSSE3-NEXT: pand %xmm1, %xmm2
36461 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36462 ; SSSE3-NEXT: por %xmm2, %xmm0
36465 ; SSE41-LABEL: ult_56_v2i64:
36467 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36468 ; SSE41-NEXT: movdqa %xmm0, %xmm2
36469 ; SSE41-NEXT: pand %xmm1, %xmm2
36470 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36471 ; SSE41-NEXT: movdqa %xmm3, %xmm4
36472 ; SSE41-NEXT: pshufb %xmm2, %xmm4
36473 ; SSE41-NEXT: psrlw $4, %xmm0
36474 ; SSE41-NEXT: pand %xmm1, %xmm0
36475 ; SSE41-NEXT: pshufb %xmm0, %xmm3
36476 ; SSE41-NEXT: paddb %xmm4, %xmm3
36477 ; SSE41-NEXT: pxor %xmm0, %xmm0
36478 ; SSE41-NEXT: psadbw %xmm0, %xmm3
36479 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36480 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36481 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
36482 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483704,2147483704]
36483 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
36484 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
36485 ; SSE41-NEXT: pand %xmm1, %xmm2
36486 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36487 ; SSE41-NEXT: por %xmm2, %xmm0
36490 ; AVX1-LABEL: ult_56_v2i64:
36492 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36493 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
36494 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36495 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
36496 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
36497 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
36498 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
36499 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
36500 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
36501 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36502 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56]
36503 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36506 ; AVX2-LABEL: ult_56_v2i64:
36508 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36509 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
36510 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36511 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
36512 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
36513 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
36514 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
36515 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
36516 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
36517 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36518 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56]
36519 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36522 ; AVX512VPOPCNTDQ-LABEL: ult_56_v2i64:
36523 ; AVX512VPOPCNTDQ: # %bb.0:
36524 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
36525 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
36526 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56]
36527 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36528 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
36529 ; AVX512VPOPCNTDQ-NEXT: retq
36531 ; AVX512VPOPCNTDQVL-LABEL: ult_56_v2i64:
36532 ; AVX512VPOPCNTDQVL: # %bb.0:
36533 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
36534 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36535 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36536 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36537 ; AVX512VPOPCNTDQVL-NEXT: retq
36539 ; BITALG_NOVLX-LABEL: ult_56_v2i64:
36540 ; BITALG_NOVLX: # %bb.0:
36541 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
36542 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
36543 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
36544 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36545 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56]
36546 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36547 ; BITALG_NOVLX-NEXT: vzeroupper
36548 ; BITALG_NOVLX-NEXT: retq
36550 ; BITALG-LABEL: ult_56_v2i64:
36552 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
36553 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
36554 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36555 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36556 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36557 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36558 ; BITALG-NEXT: retq
36559 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36560 %3 = icmp ult <2 x i64> %2, <i64 56, i64 56>
36561 %4 = sext <2 x i1> %3 to <2 x i64>
36565 define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) {
36566 ; SSE2-LABEL: ugt_56_v2i64:
36568 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36569 ; SSE2-NEXT: psrlw $1, %xmm1
36570 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36571 ; SSE2-NEXT: psubb %xmm1, %xmm0
36572 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36573 ; SSE2-NEXT: movdqa %xmm0, %xmm2
36574 ; SSE2-NEXT: pand %xmm1, %xmm2
36575 ; SSE2-NEXT: psrlw $2, %xmm0
36576 ; SSE2-NEXT: pand %xmm1, %xmm0
36577 ; SSE2-NEXT: paddb %xmm2, %xmm0
36578 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36579 ; SSE2-NEXT: psrlw $4, %xmm1
36580 ; SSE2-NEXT: paddb %xmm0, %xmm1
36581 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36582 ; SSE2-NEXT: pxor %xmm0, %xmm0
36583 ; SSE2-NEXT: psadbw %xmm0, %xmm1
36584 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36585 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36586 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
36587 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36588 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
36589 ; SSE2-NEXT: pand %xmm2, %xmm3
36590 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
36591 ; SSE2-NEXT: por %xmm3, %xmm0
36594 ; SSE3-LABEL: ugt_56_v2i64:
36596 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36597 ; SSE3-NEXT: psrlw $1, %xmm1
36598 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36599 ; SSE3-NEXT: psubb %xmm1, %xmm0
36600 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36601 ; SSE3-NEXT: movdqa %xmm0, %xmm2
36602 ; SSE3-NEXT: pand %xmm1, %xmm2
36603 ; SSE3-NEXT: psrlw $2, %xmm0
36604 ; SSE3-NEXT: pand %xmm1, %xmm0
36605 ; SSE3-NEXT: paddb %xmm2, %xmm0
36606 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36607 ; SSE3-NEXT: psrlw $4, %xmm1
36608 ; SSE3-NEXT: paddb %xmm0, %xmm1
36609 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36610 ; SSE3-NEXT: pxor %xmm0, %xmm0
36611 ; SSE3-NEXT: psadbw %xmm0, %xmm1
36612 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36613 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36614 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
36615 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36616 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
36617 ; SSE3-NEXT: pand %xmm2, %xmm3
36618 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
36619 ; SSE3-NEXT: por %xmm3, %xmm0
36622 ; SSSE3-LABEL: ugt_56_v2i64:
36624 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36625 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
36626 ; SSSE3-NEXT: pand %xmm1, %xmm2
36627 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36628 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
36629 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
36630 ; SSSE3-NEXT: psrlw $4, %xmm0
36631 ; SSSE3-NEXT: pand %xmm1, %xmm0
36632 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
36633 ; SSSE3-NEXT: paddb %xmm4, %xmm3
36634 ; SSSE3-NEXT: pxor %xmm0, %xmm0
36635 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
36636 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36637 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36638 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
36639 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36640 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
36641 ; SSSE3-NEXT: pand %xmm1, %xmm2
36642 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
36643 ; SSSE3-NEXT: por %xmm2, %xmm0
36646 ; SSE41-LABEL: ugt_56_v2i64:
36648 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36649 ; SSE41-NEXT: movdqa %xmm0, %xmm2
36650 ; SSE41-NEXT: pand %xmm1, %xmm2
36651 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36652 ; SSE41-NEXT: movdqa %xmm3, %xmm4
36653 ; SSE41-NEXT: pshufb %xmm2, %xmm4
36654 ; SSE41-NEXT: psrlw $4, %xmm0
36655 ; SSE41-NEXT: pand %xmm1, %xmm0
36656 ; SSE41-NEXT: pshufb %xmm0, %xmm3
36657 ; SSE41-NEXT: paddb %xmm4, %xmm3
36658 ; SSE41-NEXT: pxor %xmm0, %xmm0
36659 ; SSE41-NEXT: psadbw %xmm0, %xmm3
36660 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36661 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36662 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
36663 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36664 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
36665 ; SSE41-NEXT: pand %xmm1, %xmm2
36666 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
36667 ; SSE41-NEXT: por %xmm2, %xmm0
36670 ; AVX1-LABEL: ugt_56_v2i64:
36672 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36673 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
36674 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36675 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
36676 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
36677 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
36678 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
36679 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
36680 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
36681 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36682 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
36685 ; AVX2-LABEL: ugt_56_v2i64:
36687 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36688 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
36689 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36690 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
36691 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
36692 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
36693 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
36694 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
36695 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
36696 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36697 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
36700 ; AVX512VPOPCNTDQ-LABEL: ugt_56_v2i64:
36701 ; AVX512VPOPCNTDQ: # %bb.0:
36702 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
36703 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
36704 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
36705 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
36706 ; AVX512VPOPCNTDQ-NEXT: retq
36708 ; AVX512VPOPCNTDQVL-LABEL: ugt_56_v2i64:
36709 ; AVX512VPOPCNTDQVL: # %bb.0:
36710 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
36711 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36712 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36713 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36714 ; AVX512VPOPCNTDQVL-NEXT: retq
36716 ; BITALG_NOVLX-LABEL: ugt_56_v2i64:
36717 ; BITALG_NOVLX: # %bb.0:
36718 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
36719 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
36720 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
36721 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36722 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
36723 ; BITALG_NOVLX-NEXT: vzeroupper
36724 ; BITALG_NOVLX-NEXT: retq
36726 ; BITALG-LABEL: ugt_56_v2i64:
36728 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
36729 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
36730 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36731 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36732 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36733 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36734 ; BITALG-NEXT: retq
36735 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36736 %3 = icmp ugt <2 x i64> %2, <i64 56, i64 56>
36737 %4 = sext <2 x i1> %3 to <2 x i64>
36741 define <2 x i64> @ult_57_v2i64(<2 x i64> %0) {
36742 ; SSE2-LABEL: ult_57_v2i64:
36744 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36745 ; SSE2-NEXT: psrlw $1, %xmm1
36746 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36747 ; SSE2-NEXT: psubb %xmm1, %xmm0
36748 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36749 ; SSE2-NEXT: movdqa %xmm0, %xmm2
36750 ; SSE2-NEXT: pand %xmm1, %xmm2
36751 ; SSE2-NEXT: psrlw $2, %xmm0
36752 ; SSE2-NEXT: pand %xmm1, %xmm0
36753 ; SSE2-NEXT: paddb %xmm2, %xmm0
36754 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36755 ; SSE2-NEXT: psrlw $4, %xmm1
36756 ; SSE2-NEXT: paddb %xmm0, %xmm1
36757 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36758 ; SSE2-NEXT: pxor %xmm0, %xmm0
36759 ; SSE2-NEXT: psadbw %xmm0, %xmm1
36760 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36761 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36762 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
36763 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483705,2147483705]
36764 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
36765 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
36766 ; SSE2-NEXT: pand %xmm2, %xmm1
36767 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36768 ; SSE2-NEXT: por %xmm1, %xmm0
36771 ; SSE3-LABEL: ult_57_v2i64:
36773 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36774 ; SSE3-NEXT: psrlw $1, %xmm1
36775 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36776 ; SSE3-NEXT: psubb %xmm1, %xmm0
36777 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36778 ; SSE3-NEXT: movdqa %xmm0, %xmm2
36779 ; SSE3-NEXT: pand %xmm1, %xmm2
36780 ; SSE3-NEXT: psrlw $2, %xmm0
36781 ; SSE3-NEXT: pand %xmm1, %xmm0
36782 ; SSE3-NEXT: paddb %xmm2, %xmm0
36783 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36784 ; SSE3-NEXT: psrlw $4, %xmm1
36785 ; SSE3-NEXT: paddb %xmm0, %xmm1
36786 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36787 ; SSE3-NEXT: pxor %xmm0, %xmm0
36788 ; SSE3-NEXT: psadbw %xmm0, %xmm1
36789 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36790 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36791 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
36792 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483705,2147483705]
36793 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
36794 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
36795 ; SSE3-NEXT: pand %xmm2, %xmm1
36796 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36797 ; SSE3-NEXT: por %xmm1, %xmm0
36800 ; SSSE3-LABEL: ult_57_v2i64:
36802 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36803 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
36804 ; SSSE3-NEXT: pand %xmm1, %xmm2
36805 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36806 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
36807 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
36808 ; SSSE3-NEXT: psrlw $4, %xmm0
36809 ; SSSE3-NEXT: pand %xmm1, %xmm0
36810 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
36811 ; SSSE3-NEXT: paddb %xmm4, %xmm3
36812 ; SSSE3-NEXT: pxor %xmm0, %xmm0
36813 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
36814 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36815 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36816 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
36817 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483705,2147483705]
36818 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
36819 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
36820 ; SSSE3-NEXT: pand %xmm1, %xmm2
36821 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36822 ; SSSE3-NEXT: por %xmm2, %xmm0
36825 ; SSE41-LABEL: ult_57_v2i64:
36827 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36828 ; SSE41-NEXT: movdqa %xmm0, %xmm2
36829 ; SSE41-NEXT: pand %xmm1, %xmm2
36830 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36831 ; SSE41-NEXT: movdqa %xmm3, %xmm4
36832 ; SSE41-NEXT: pshufb %xmm2, %xmm4
36833 ; SSE41-NEXT: psrlw $4, %xmm0
36834 ; SSE41-NEXT: pand %xmm1, %xmm0
36835 ; SSE41-NEXT: pshufb %xmm0, %xmm3
36836 ; SSE41-NEXT: paddb %xmm4, %xmm3
36837 ; SSE41-NEXT: pxor %xmm0, %xmm0
36838 ; SSE41-NEXT: psadbw %xmm0, %xmm3
36839 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36840 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36841 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
36842 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483705,2147483705]
36843 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
36844 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
36845 ; SSE41-NEXT: pand %xmm1, %xmm2
36846 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
36847 ; SSE41-NEXT: por %xmm2, %xmm0
36850 ; AVX1-LABEL: ult_57_v2i64:
36852 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36853 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
36854 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36855 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
36856 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
36857 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
36858 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
36859 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
36860 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
36861 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36862 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57]
36863 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36866 ; AVX2-LABEL: ult_57_v2i64:
36868 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36869 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
36870 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36871 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
36872 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
36873 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
36874 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
36875 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
36876 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
36877 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36878 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57]
36879 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36882 ; AVX512VPOPCNTDQ-LABEL: ult_57_v2i64:
36883 ; AVX512VPOPCNTDQ: # %bb.0:
36884 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
36885 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
36886 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57]
36887 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36888 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
36889 ; AVX512VPOPCNTDQ-NEXT: retq
36891 ; AVX512VPOPCNTDQVL-LABEL: ult_57_v2i64:
36892 ; AVX512VPOPCNTDQVL: # %bb.0:
36893 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
36894 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36895 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36896 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36897 ; AVX512VPOPCNTDQVL-NEXT: retq
36899 ; BITALG_NOVLX-LABEL: ult_57_v2i64:
36900 ; BITALG_NOVLX: # %bb.0:
36901 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
36902 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
36903 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
36904 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36905 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57]
36906 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
36907 ; BITALG_NOVLX-NEXT: vzeroupper
36908 ; BITALG_NOVLX-NEXT: retq
36910 ; BITALG-LABEL: ult_57_v2i64:
36912 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
36913 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
36914 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
36915 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
36916 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
36917 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
36918 ; BITALG-NEXT: retq
36919 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
36920 %3 = icmp ult <2 x i64> %2, <i64 57, i64 57>
36921 %4 = sext <2 x i1> %3 to <2 x i64>
36925 define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) {
36926 ; SSE2-LABEL: ugt_57_v2i64:
36928 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36929 ; SSE2-NEXT: psrlw $1, %xmm1
36930 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36931 ; SSE2-NEXT: psubb %xmm1, %xmm0
36932 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36933 ; SSE2-NEXT: movdqa %xmm0, %xmm2
36934 ; SSE2-NEXT: pand %xmm1, %xmm2
36935 ; SSE2-NEXT: psrlw $2, %xmm0
36936 ; SSE2-NEXT: pand %xmm1, %xmm0
36937 ; SSE2-NEXT: paddb %xmm2, %xmm0
36938 ; SSE2-NEXT: movdqa %xmm0, %xmm1
36939 ; SSE2-NEXT: psrlw $4, %xmm1
36940 ; SSE2-NEXT: paddb %xmm0, %xmm1
36941 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36942 ; SSE2-NEXT: pxor %xmm0, %xmm0
36943 ; SSE2-NEXT: psadbw %xmm0, %xmm1
36944 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36945 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36946 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
36947 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36948 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
36949 ; SSE2-NEXT: pand %xmm2, %xmm3
36950 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
36951 ; SSE2-NEXT: por %xmm3, %xmm0
36954 ; SSE3-LABEL: ugt_57_v2i64:
36956 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36957 ; SSE3-NEXT: psrlw $1, %xmm1
36958 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36959 ; SSE3-NEXT: psubb %xmm1, %xmm0
36960 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
36961 ; SSE3-NEXT: movdqa %xmm0, %xmm2
36962 ; SSE3-NEXT: pand %xmm1, %xmm2
36963 ; SSE3-NEXT: psrlw $2, %xmm0
36964 ; SSE3-NEXT: pand %xmm1, %xmm0
36965 ; SSE3-NEXT: paddb %xmm2, %xmm0
36966 ; SSE3-NEXT: movdqa %xmm0, %xmm1
36967 ; SSE3-NEXT: psrlw $4, %xmm1
36968 ; SSE3-NEXT: paddb %xmm0, %xmm1
36969 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36970 ; SSE3-NEXT: pxor %xmm0, %xmm0
36971 ; SSE3-NEXT: psadbw %xmm0, %xmm1
36972 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36973 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
36974 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
36975 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
36976 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
36977 ; SSE3-NEXT: pand %xmm2, %xmm3
36978 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
36979 ; SSE3-NEXT: por %xmm3, %xmm0
36982 ; SSSE3-LABEL: ugt_57_v2i64:
36984 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
36985 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
36986 ; SSSE3-NEXT: pand %xmm1, %xmm2
36987 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
36988 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
36989 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
36990 ; SSSE3-NEXT: psrlw $4, %xmm0
36991 ; SSSE3-NEXT: pand %xmm1, %xmm0
36992 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
36993 ; SSSE3-NEXT: paddb %xmm4, %xmm3
36994 ; SSSE3-NEXT: pxor %xmm0, %xmm0
36995 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
36996 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
36997 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
36998 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
36999 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37000 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
37001 ; SSSE3-NEXT: pand %xmm1, %xmm2
37002 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
37003 ; SSSE3-NEXT: por %xmm2, %xmm0
37006 ; SSE41-LABEL: ugt_57_v2i64:
37008 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37009 ; SSE41-NEXT: movdqa %xmm0, %xmm2
37010 ; SSE41-NEXT: pand %xmm1, %xmm2
37011 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37012 ; SSE41-NEXT: movdqa %xmm3, %xmm4
37013 ; SSE41-NEXT: pshufb %xmm2, %xmm4
37014 ; SSE41-NEXT: psrlw $4, %xmm0
37015 ; SSE41-NEXT: pand %xmm1, %xmm0
37016 ; SSE41-NEXT: pshufb %xmm0, %xmm3
37017 ; SSE41-NEXT: paddb %xmm4, %xmm3
37018 ; SSE41-NEXT: pxor %xmm0, %xmm0
37019 ; SSE41-NEXT: psadbw %xmm0, %xmm3
37020 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37021 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37022 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
37023 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37024 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
37025 ; SSE41-NEXT: pand %xmm1, %xmm2
37026 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
37027 ; SSE41-NEXT: por %xmm2, %xmm0
37030 ; AVX1-LABEL: ugt_57_v2i64:
37032 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37033 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
37034 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37035 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37036 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
37037 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
37038 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37039 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37040 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
37041 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37042 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37045 ; AVX2-LABEL: ugt_57_v2i64:
37047 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37048 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
37049 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37050 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37051 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
37052 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
37053 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37054 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37055 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
37056 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37057 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37060 ; AVX512VPOPCNTDQ-LABEL: ugt_57_v2i64:
37061 ; AVX512VPOPCNTDQ: # %bb.0:
37062 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37063 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
37064 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37065 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
37066 ; AVX512VPOPCNTDQ-NEXT: retq
37068 ; AVX512VPOPCNTDQVL-LABEL: ugt_57_v2i64:
37069 ; AVX512VPOPCNTDQVL: # %bb.0:
37070 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
37071 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37072 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37073 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37074 ; AVX512VPOPCNTDQVL-NEXT: retq
37076 ; BITALG_NOVLX-LABEL: ugt_57_v2i64:
37077 ; BITALG_NOVLX: # %bb.0:
37078 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37079 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
37080 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
37081 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37082 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37083 ; BITALG_NOVLX-NEXT: vzeroupper
37084 ; BITALG_NOVLX-NEXT: retq
37086 ; BITALG-LABEL: ugt_57_v2i64:
37088 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
37089 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
37090 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37091 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37092 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37093 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37094 ; BITALG-NEXT: retq
37095 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
37096 %3 = icmp ugt <2 x i64> %2, <i64 57, i64 57>
37097 %4 = sext <2 x i1> %3 to <2 x i64>
37101 define <2 x i64> @ult_58_v2i64(<2 x i64> %0) {
37102 ; SSE2-LABEL: ult_58_v2i64:
37104 ; SSE2-NEXT: movdqa %xmm0, %xmm1
37105 ; SSE2-NEXT: psrlw $1, %xmm1
37106 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37107 ; SSE2-NEXT: psubb %xmm1, %xmm0
37108 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37109 ; SSE2-NEXT: movdqa %xmm0, %xmm2
37110 ; SSE2-NEXT: pand %xmm1, %xmm2
37111 ; SSE2-NEXT: psrlw $2, %xmm0
37112 ; SSE2-NEXT: pand %xmm1, %xmm0
37113 ; SSE2-NEXT: paddb %xmm2, %xmm0
37114 ; SSE2-NEXT: movdqa %xmm0, %xmm1
37115 ; SSE2-NEXT: psrlw $4, %xmm1
37116 ; SSE2-NEXT: paddb %xmm0, %xmm1
37117 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37118 ; SSE2-NEXT: pxor %xmm0, %xmm0
37119 ; SSE2-NEXT: psadbw %xmm0, %xmm1
37120 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37121 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
37122 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
37123 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483706,2147483706]
37124 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
37125 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
37126 ; SSE2-NEXT: pand %xmm2, %xmm1
37127 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37128 ; SSE2-NEXT: por %xmm1, %xmm0
37131 ; SSE3-LABEL: ult_58_v2i64:
37133 ; SSE3-NEXT: movdqa %xmm0, %xmm1
37134 ; SSE3-NEXT: psrlw $1, %xmm1
37135 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37136 ; SSE3-NEXT: psubb %xmm1, %xmm0
37137 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37138 ; SSE3-NEXT: movdqa %xmm0, %xmm2
37139 ; SSE3-NEXT: pand %xmm1, %xmm2
37140 ; SSE3-NEXT: psrlw $2, %xmm0
37141 ; SSE3-NEXT: pand %xmm1, %xmm0
37142 ; SSE3-NEXT: paddb %xmm2, %xmm0
37143 ; SSE3-NEXT: movdqa %xmm0, %xmm1
37144 ; SSE3-NEXT: psrlw $4, %xmm1
37145 ; SSE3-NEXT: paddb %xmm0, %xmm1
37146 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37147 ; SSE3-NEXT: pxor %xmm0, %xmm0
37148 ; SSE3-NEXT: psadbw %xmm0, %xmm1
37149 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37150 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
37151 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
37152 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483706,2147483706]
37153 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
37154 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
37155 ; SSE3-NEXT: pand %xmm2, %xmm1
37156 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37157 ; SSE3-NEXT: por %xmm1, %xmm0
37160 ; SSSE3-LABEL: ult_58_v2i64:
37162 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37163 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
37164 ; SSSE3-NEXT: pand %xmm1, %xmm2
37165 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37166 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
37167 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
37168 ; SSSE3-NEXT: psrlw $4, %xmm0
37169 ; SSSE3-NEXT: pand %xmm1, %xmm0
37170 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
37171 ; SSSE3-NEXT: paddb %xmm4, %xmm3
37172 ; SSSE3-NEXT: pxor %xmm0, %xmm0
37173 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
37174 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37175 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37176 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
37177 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483706,2147483706]
37178 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
37179 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
37180 ; SSSE3-NEXT: pand %xmm1, %xmm2
37181 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37182 ; SSSE3-NEXT: por %xmm2, %xmm0
37185 ; SSE41-LABEL: ult_58_v2i64:
37187 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37188 ; SSE41-NEXT: movdqa %xmm0, %xmm2
37189 ; SSE41-NEXT: pand %xmm1, %xmm2
37190 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37191 ; SSE41-NEXT: movdqa %xmm3, %xmm4
37192 ; SSE41-NEXT: pshufb %xmm2, %xmm4
37193 ; SSE41-NEXT: psrlw $4, %xmm0
37194 ; SSE41-NEXT: pand %xmm1, %xmm0
37195 ; SSE41-NEXT: pshufb %xmm0, %xmm3
37196 ; SSE41-NEXT: paddb %xmm4, %xmm3
37197 ; SSE41-NEXT: pxor %xmm0, %xmm0
37198 ; SSE41-NEXT: psadbw %xmm0, %xmm3
37199 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37200 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37201 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
37202 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483706,2147483706]
37203 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
37204 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
37205 ; SSE41-NEXT: pand %xmm1, %xmm2
37206 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37207 ; SSE41-NEXT: por %xmm2, %xmm0
37210 ; AVX1-LABEL: ult_58_v2i64:
37212 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37213 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
37214 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37215 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37216 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
37217 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
37218 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37219 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37220 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
37221 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37222 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58]
37223 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37226 ; AVX2-LABEL: ult_58_v2i64:
37228 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37229 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
37230 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37231 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37232 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
37233 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
37234 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37235 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37236 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
37237 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37238 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58]
37239 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37242 ; AVX512VPOPCNTDQ-LABEL: ult_58_v2i64:
37243 ; AVX512VPOPCNTDQ: # %bb.0:
37244 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37245 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
37246 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58]
37247 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37248 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
37249 ; AVX512VPOPCNTDQ-NEXT: retq
37251 ; AVX512VPOPCNTDQVL-LABEL: ult_58_v2i64:
37252 ; AVX512VPOPCNTDQVL: # %bb.0:
37253 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
37254 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37255 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37256 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37257 ; AVX512VPOPCNTDQVL-NEXT: retq
37259 ; BITALG_NOVLX-LABEL: ult_58_v2i64:
37260 ; BITALG_NOVLX: # %bb.0:
37261 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37262 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
37263 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
37264 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37265 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58]
37266 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37267 ; BITALG_NOVLX-NEXT: vzeroupper
37268 ; BITALG_NOVLX-NEXT: retq
37270 ; BITALG-LABEL: ult_58_v2i64:
37272 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
37273 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
37274 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37275 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37276 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37277 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37278 ; BITALG-NEXT: retq
37279 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
37280 %3 = icmp ult <2 x i64> %2, <i64 58, i64 58>
37281 %4 = sext <2 x i1> %3 to <2 x i64>
37285 define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) {
37286 ; SSE2-LABEL: ugt_58_v2i64:
37288 ; SSE2-NEXT: movdqa %xmm0, %xmm1
37289 ; SSE2-NEXT: psrlw $1, %xmm1
37290 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37291 ; SSE2-NEXT: psubb %xmm1, %xmm0
37292 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37293 ; SSE2-NEXT: movdqa %xmm0, %xmm2
37294 ; SSE2-NEXT: pand %xmm1, %xmm2
37295 ; SSE2-NEXT: psrlw $2, %xmm0
37296 ; SSE2-NEXT: pand %xmm1, %xmm0
37297 ; SSE2-NEXT: paddb %xmm2, %xmm0
37298 ; SSE2-NEXT: movdqa %xmm0, %xmm1
37299 ; SSE2-NEXT: psrlw $4, %xmm1
37300 ; SSE2-NEXT: paddb %xmm0, %xmm1
37301 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37302 ; SSE2-NEXT: pxor %xmm0, %xmm0
37303 ; SSE2-NEXT: psadbw %xmm0, %xmm1
37304 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37305 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
37306 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
37307 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37308 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
37309 ; SSE2-NEXT: pand %xmm2, %xmm3
37310 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
37311 ; SSE2-NEXT: por %xmm3, %xmm0
37314 ; SSE3-LABEL: ugt_58_v2i64:
37316 ; SSE3-NEXT: movdqa %xmm0, %xmm1
37317 ; SSE3-NEXT: psrlw $1, %xmm1
37318 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37319 ; SSE3-NEXT: psubb %xmm1, %xmm0
37320 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37321 ; SSE3-NEXT: movdqa %xmm0, %xmm2
37322 ; SSE3-NEXT: pand %xmm1, %xmm2
37323 ; SSE3-NEXT: psrlw $2, %xmm0
37324 ; SSE3-NEXT: pand %xmm1, %xmm0
37325 ; SSE3-NEXT: paddb %xmm2, %xmm0
37326 ; SSE3-NEXT: movdqa %xmm0, %xmm1
37327 ; SSE3-NEXT: psrlw $4, %xmm1
37328 ; SSE3-NEXT: paddb %xmm0, %xmm1
37329 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37330 ; SSE3-NEXT: pxor %xmm0, %xmm0
37331 ; SSE3-NEXT: psadbw %xmm0, %xmm1
37332 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37333 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
37334 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
37335 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37336 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
37337 ; SSE3-NEXT: pand %xmm2, %xmm3
37338 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
37339 ; SSE3-NEXT: por %xmm3, %xmm0
37342 ; SSSE3-LABEL: ugt_58_v2i64:
37344 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37345 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
37346 ; SSSE3-NEXT: pand %xmm1, %xmm2
37347 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37348 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
37349 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
37350 ; SSSE3-NEXT: psrlw $4, %xmm0
37351 ; SSSE3-NEXT: pand %xmm1, %xmm0
37352 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
37353 ; SSSE3-NEXT: paddb %xmm4, %xmm3
37354 ; SSSE3-NEXT: pxor %xmm0, %xmm0
37355 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
37356 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37357 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37358 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
37359 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37360 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
37361 ; SSSE3-NEXT: pand %xmm1, %xmm2
37362 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
37363 ; SSSE3-NEXT: por %xmm2, %xmm0
37366 ; SSE41-LABEL: ugt_58_v2i64:
37368 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37369 ; SSE41-NEXT: movdqa %xmm0, %xmm2
37370 ; SSE41-NEXT: pand %xmm1, %xmm2
37371 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37372 ; SSE41-NEXT: movdqa %xmm3, %xmm4
37373 ; SSE41-NEXT: pshufb %xmm2, %xmm4
37374 ; SSE41-NEXT: psrlw $4, %xmm0
37375 ; SSE41-NEXT: pand %xmm1, %xmm0
37376 ; SSE41-NEXT: pshufb %xmm0, %xmm3
37377 ; SSE41-NEXT: paddb %xmm4, %xmm3
37378 ; SSE41-NEXT: pxor %xmm0, %xmm0
37379 ; SSE41-NEXT: psadbw %xmm0, %xmm3
37380 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37381 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37382 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
37383 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37384 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
37385 ; SSE41-NEXT: pand %xmm1, %xmm2
37386 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
37387 ; SSE41-NEXT: por %xmm2, %xmm0
37390 ; AVX1-LABEL: ugt_58_v2i64:
37392 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37393 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
37394 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37395 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37396 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
37397 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
37398 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37399 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37400 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
37401 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37402 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37405 ; AVX2-LABEL: ugt_58_v2i64:
37407 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37408 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
37409 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37410 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37411 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
37412 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
37413 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37414 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37415 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
37416 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37417 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37420 ; AVX512VPOPCNTDQ-LABEL: ugt_58_v2i64:
37421 ; AVX512VPOPCNTDQ: # %bb.0:
37422 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37423 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
37424 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37425 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
37426 ; AVX512VPOPCNTDQ-NEXT: retq
37428 ; AVX512VPOPCNTDQVL-LABEL: ugt_58_v2i64:
37429 ; AVX512VPOPCNTDQVL: # %bb.0:
37430 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
37431 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37432 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37433 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37434 ; AVX512VPOPCNTDQVL-NEXT: retq
37436 ; BITALG_NOVLX-LABEL: ugt_58_v2i64:
37437 ; BITALG_NOVLX: # %bb.0:
37438 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37439 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
37440 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
37441 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37442 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37443 ; BITALG_NOVLX-NEXT: vzeroupper
37444 ; BITALG_NOVLX-NEXT: retq
37446 ; BITALG-LABEL: ugt_58_v2i64:
37448 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
37449 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
37450 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37451 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37452 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37453 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37454 ; BITALG-NEXT: retq
37455 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
37456 %3 = icmp ugt <2 x i64> %2, <i64 58, i64 58>
37457 %4 = sext <2 x i1> %3 to <2 x i64>
37461 define <2 x i64> @ult_59_v2i64(<2 x i64> %0) {
37462 ; SSE2-LABEL: ult_59_v2i64:
37464 ; SSE2-NEXT: movdqa %xmm0, %xmm1
37465 ; SSE2-NEXT: psrlw $1, %xmm1
37466 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37467 ; SSE2-NEXT: psubb %xmm1, %xmm0
37468 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37469 ; SSE2-NEXT: movdqa %xmm0, %xmm2
37470 ; SSE2-NEXT: pand %xmm1, %xmm2
37471 ; SSE2-NEXT: psrlw $2, %xmm0
37472 ; SSE2-NEXT: pand %xmm1, %xmm0
37473 ; SSE2-NEXT: paddb %xmm2, %xmm0
37474 ; SSE2-NEXT: movdqa %xmm0, %xmm1
37475 ; SSE2-NEXT: psrlw $4, %xmm1
37476 ; SSE2-NEXT: paddb %xmm0, %xmm1
37477 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37478 ; SSE2-NEXT: pxor %xmm0, %xmm0
37479 ; SSE2-NEXT: psadbw %xmm0, %xmm1
37480 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37481 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
37482 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
37483 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483707,2147483707]
37484 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
37485 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
37486 ; SSE2-NEXT: pand %xmm2, %xmm1
37487 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37488 ; SSE2-NEXT: por %xmm1, %xmm0
37491 ; SSE3-LABEL: ult_59_v2i64:
37493 ; SSE3-NEXT: movdqa %xmm0, %xmm1
37494 ; SSE3-NEXT: psrlw $1, %xmm1
37495 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37496 ; SSE3-NEXT: psubb %xmm1, %xmm0
37497 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37498 ; SSE3-NEXT: movdqa %xmm0, %xmm2
37499 ; SSE3-NEXT: pand %xmm1, %xmm2
37500 ; SSE3-NEXT: psrlw $2, %xmm0
37501 ; SSE3-NEXT: pand %xmm1, %xmm0
37502 ; SSE3-NEXT: paddb %xmm2, %xmm0
37503 ; SSE3-NEXT: movdqa %xmm0, %xmm1
37504 ; SSE3-NEXT: psrlw $4, %xmm1
37505 ; SSE3-NEXT: paddb %xmm0, %xmm1
37506 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37507 ; SSE3-NEXT: pxor %xmm0, %xmm0
37508 ; SSE3-NEXT: psadbw %xmm0, %xmm1
37509 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37510 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
37511 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
37512 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483707,2147483707]
37513 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
37514 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
37515 ; SSE3-NEXT: pand %xmm2, %xmm1
37516 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37517 ; SSE3-NEXT: por %xmm1, %xmm0
37520 ; SSSE3-LABEL: ult_59_v2i64:
37522 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37523 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
37524 ; SSSE3-NEXT: pand %xmm1, %xmm2
37525 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37526 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
37527 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
37528 ; SSSE3-NEXT: psrlw $4, %xmm0
37529 ; SSSE3-NEXT: pand %xmm1, %xmm0
37530 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
37531 ; SSSE3-NEXT: paddb %xmm4, %xmm3
37532 ; SSSE3-NEXT: pxor %xmm0, %xmm0
37533 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
37534 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37535 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37536 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
37537 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483707,2147483707]
37538 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
37539 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
37540 ; SSSE3-NEXT: pand %xmm1, %xmm2
37541 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37542 ; SSSE3-NEXT: por %xmm2, %xmm0
37545 ; SSE41-LABEL: ult_59_v2i64:
37547 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37548 ; SSE41-NEXT: movdqa %xmm0, %xmm2
37549 ; SSE41-NEXT: pand %xmm1, %xmm2
37550 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37551 ; SSE41-NEXT: movdqa %xmm3, %xmm4
37552 ; SSE41-NEXT: pshufb %xmm2, %xmm4
37553 ; SSE41-NEXT: psrlw $4, %xmm0
37554 ; SSE41-NEXT: pand %xmm1, %xmm0
37555 ; SSE41-NEXT: pshufb %xmm0, %xmm3
37556 ; SSE41-NEXT: paddb %xmm4, %xmm3
37557 ; SSE41-NEXT: pxor %xmm0, %xmm0
37558 ; SSE41-NEXT: psadbw %xmm0, %xmm3
37559 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37560 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37561 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
37562 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483707,2147483707]
37563 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
37564 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
37565 ; SSE41-NEXT: pand %xmm1, %xmm2
37566 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37567 ; SSE41-NEXT: por %xmm2, %xmm0
37570 ; AVX1-LABEL: ult_59_v2i64:
37572 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37573 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
37574 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37575 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37576 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
37577 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
37578 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37579 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37580 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
37581 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37582 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59]
37583 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37586 ; AVX2-LABEL: ult_59_v2i64:
37588 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37589 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
37590 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37591 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37592 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
37593 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
37594 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37595 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37596 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
37597 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37598 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59]
37599 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37602 ; AVX512VPOPCNTDQ-LABEL: ult_59_v2i64:
37603 ; AVX512VPOPCNTDQ: # %bb.0:
37604 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37605 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
37606 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59]
37607 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37608 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
37609 ; AVX512VPOPCNTDQ-NEXT: retq
37611 ; AVX512VPOPCNTDQVL-LABEL: ult_59_v2i64:
37612 ; AVX512VPOPCNTDQVL: # %bb.0:
37613 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
37614 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37615 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37616 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37617 ; AVX512VPOPCNTDQVL-NEXT: retq
37619 ; BITALG_NOVLX-LABEL: ult_59_v2i64:
37620 ; BITALG_NOVLX: # %bb.0:
37621 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37622 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
37623 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
37624 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37625 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59]
37626 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37627 ; BITALG_NOVLX-NEXT: vzeroupper
37628 ; BITALG_NOVLX-NEXT: retq
37630 ; BITALG-LABEL: ult_59_v2i64:
37632 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
37633 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
37634 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37635 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37636 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37637 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37638 ; BITALG-NEXT: retq
37639 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
37640 %3 = icmp ult <2 x i64> %2, <i64 59, i64 59>
37641 %4 = sext <2 x i1> %3 to <2 x i64>
37645 define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) {
37646 ; SSE2-LABEL: ugt_59_v2i64:
37648 ; SSE2-NEXT: movdqa %xmm0, %xmm1
37649 ; SSE2-NEXT: psrlw $1, %xmm1
37650 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37651 ; SSE2-NEXT: psubb %xmm1, %xmm0
37652 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37653 ; SSE2-NEXT: movdqa %xmm0, %xmm2
37654 ; SSE2-NEXT: pand %xmm1, %xmm2
37655 ; SSE2-NEXT: psrlw $2, %xmm0
37656 ; SSE2-NEXT: pand %xmm1, %xmm0
37657 ; SSE2-NEXT: paddb %xmm2, %xmm0
37658 ; SSE2-NEXT: movdqa %xmm0, %xmm1
37659 ; SSE2-NEXT: psrlw $4, %xmm1
37660 ; SSE2-NEXT: paddb %xmm0, %xmm1
37661 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37662 ; SSE2-NEXT: pxor %xmm0, %xmm0
37663 ; SSE2-NEXT: psadbw %xmm0, %xmm1
37664 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37665 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
37666 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
37667 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37668 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
37669 ; SSE2-NEXT: pand %xmm2, %xmm3
37670 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
37671 ; SSE2-NEXT: por %xmm3, %xmm0
37674 ; SSE3-LABEL: ugt_59_v2i64:
37676 ; SSE3-NEXT: movdqa %xmm0, %xmm1
37677 ; SSE3-NEXT: psrlw $1, %xmm1
37678 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37679 ; SSE3-NEXT: psubb %xmm1, %xmm0
37680 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37681 ; SSE3-NEXT: movdqa %xmm0, %xmm2
37682 ; SSE3-NEXT: pand %xmm1, %xmm2
37683 ; SSE3-NEXT: psrlw $2, %xmm0
37684 ; SSE3-NEXT: pand %xmm1, %xmm0
37685 ; SSE3-NEXT: paddb %xmm2, %xmm0
37686 ; SSE3-NEXT: movdqa %xmm0, %xmm1
37687 ; SSE3-NEXT: psrlw $4, %xmm1
37688 ; SSE3-NEXT: paddb %xmm0, %xmm1
37689 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37690 ; SSE3-NEXT: pxor %xmm0, %xmm0
37691 ; SSE3-NEXT: psadbw %xmm0, %xmm1
37692 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37693 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
37694 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
37695 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37696 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
37697 ; SSE3-NEXT: pand %xmm2, %xmm3
37698 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
37699 ; SSE3-NEXT: por %xmm3, %xmm0
37702 ; SSSE3-LABEL: ugt_59_v2i64:
37704 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37705 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
37706 ; SSSE3-NEXT: pand %xmm1, %xmm2
37707 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37708 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
37709 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
37710 ; SSSE3-NEXT: psrlw $4, %xmm0
37711 ; SSSE3-NEXT: pand %xmm1, %xmm0
37712 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
37713 ; SSSE3-NEXT: paddb %xmm4, %xmm3
37714 ; SSSE3-NEXT: pxor %xmm0, %xmm0
37715 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
37716 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37717 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37718 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
37719 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37720 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
37721 ; SSSE3-NEXT: pand %xmm1, %xmm2
37722 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
37723 ; SSSE3-NEXT: por %xmm2, %xmm0
37726 ; SSE41-LABEL: ugt_59_v2i64:
37728 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37729 ; SSE41-NEXT: movdqa %xmm0, %xmm2
37730 ; SSE41-NEXT: pand %xmm1, %xmm2
37731 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37732 ; SSE41-NEXT: movdqa %xmm3, %xmm4
37733 ; SSE41-NEXT: pshufb %xmm2, %xmm4
37734 ; SSE41-NEXT: psrlw $4, %xmm0
37735 ; SSE41-NEXT: pand %xmm1, %xmm0
37736 ; SSE41-NEXT: pshufb %xmm0, %xmm3
37737 ; SSE41-NEXT: paddb %xmm4, %xmm3
37738 ; SSE41-NEXT: pxor %xmm0, %xmm0
37739 ; SSE41-NEXT: psadbw %xmm0, %xmm3
37740 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37741 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37742 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
37743 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37744 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
37745 ; SSE41-NEXT: pand %xmm1, %xmm2
37746 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
37747 ; SSE41-NEXT: por %xmm2, %xmm0
37750 ; AVX1-LABEL: ugt_59_v2i64:
37752 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37753 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
37754 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37755 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37756 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
37757 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
37758 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37759 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37760 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
37761 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37762 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37765 ; AVX2-LABEL: ugt_59_v2i64:
37767 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37768 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
37769 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37770 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37771 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
37772 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
37773 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37774 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37775 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
37776 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37777 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37780 ; AVX512VPOPCNTDQ-LABEL: ugt_59_v2i64:
37781 ; AVX512VPOPCNTDQ: # %bb.0:
37782 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37783 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
37784 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37785 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
37786 ; AVX512VPOPCNTDQ-NEXT: retq
37788 ; AVX512VPOPCNTDQVL-LABEL: ugt_59_v2i64:
37789 ; AVX512VPOPCNTDQVL: # %bb.0:
37790 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
37791 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37792 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37793 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37794 ; AVX512VPOPCNTDQVL-NEXT: retq
37796 ; BITALG_NOVLX-LABEL: ugt_59_v2i64:
37797 ; BITALG_NOVLX: # %bb.0:
37798 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37799 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
37800 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
37801 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37802 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
37803 ; BITALG_NOVLX-NEXT: vzeroupper
37804 ; BITALG_NOVLX-NEXT: retq
37806 ; BITALG-LABEL: ugt_59_v2i64:
37808 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
37809 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
37810 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37811 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37812 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37813 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37814 ; BITALG-NEXT: retq
37815 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
37816 %3 = icmp ugt <2 x i64> %2, <i64 59, i64 59>
37817 %4 = sext <2 x i1> %3 to <2 x i64>
37821 define <2 x i64> @ult_60_v2i64(<2 x i64> %0) {
37822 ; SSE2-LABEL: ult_60_v2i64:
37824 ; SSE2-NEXT: movdqa %xmm0, %xmm1
37825 ; SSE2-NEXT: psrlw $1, %xmm1
37826 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37827 ; SSE2-NEXT: psubb %xmm1, %xmm0
37828 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37829 ; SSE2-NEXT: movdqa %xmm0, %xmm2
37830 ; SSE2-NEXT: pand %xmm1, %xmm2
37831 ; SSE2-NEXT: psrlw $2, %xmm0
37832 ; SSE2-NEXT: pand %xmm1, %xmm0
37833 ; SSE2-NEXT: paddb %xmm2, %xmm0
37834 ; SSE2-NEXT: movdqa %xmm0, %xmm1
37835 ; SSE2-NEXT: psrlw $4, %xmm1
37836 ; SSE2-NEXT: paddb %xmm0, %xmm1
37837 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37838 ; SSE2-NEXT: pxor %xmm0, %xmm0
37839 ; SSE2-NEXT: psadbw %xmm0, %xmm1
37840 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37841 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
37842 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
37843 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483708,2147483708]
37844 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
37845 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
37846 ; SSE2-NEXT: pand %xmm2, %xmm1
37847 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37848 ; SSE2-NEXT: por %xmm1, %xmm0
37851 ; SSE3-LABEL: ult_60_v2i64:
37853 ; SSE3-NEXT: movdqa %xmm0, %xmm1
37854 ; SSE3-NEXT: psrlw $1, %xmm1
37855 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37856 ; SSE3-NEXT: psubb %xmm1, %xmm0
37857 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
37858 ; SSE3-NEXT: movdqa %xmm0, %xmm2
37859 ; SSE3-NEXT: pand %xmm1, %xmm2
37860 ; SSE3-NEXT: psrlw $2, %xmm0
37861 ; SSE3-NEXT: pand %xmm1, %xmm0
37862 ; SSE3-NEXT: paddb %xmm2, %xmm0
37863 ; SSE3-NEXT: movdqa %xmm0, %xmm1
37864 ; SSE3-NEXT: psrlw $4, %xmm1
37865 ; SSE3-NEXT: paddb %xmm0, %xmm1
37866 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37867 ; SSE3-NEXT: pxor %xmm0, %xmm0
37868 ; SSE3-NEXT: psadbw %xmm0, %xmm1
37869 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
37870 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
37871 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
37872 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483708,2147483708]
37873 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
37874 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
37875 ; SSE3-NEXT: pand %xmm2, %xmm1
37876 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37877 ; SSE3-NEXT: por %xmm1, %xmm0
37880 ; SSSE3-LABEL: ult_60_v2i64:
37882 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37883 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
37884 ; SSSE3-NEXT: pand %xmm1, %xmm2
37885 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37886 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
37887 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
37888 ; SSSE3-NEXT: psrlw $4, %xmm0
37889 ; SSSE3-NEXT: pand %xmm1, %xmm0
37890 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
37891 ; SSSE3-NEXT: paddb %xmm4, %xmm3
37892 ; SSSE3-NEXT: pxor %xmm0, %xmm0
37893 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
37894 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37895 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37896 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
37897 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483708,2147483708]
37898 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
37899 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
37900 ; SSSE3-NEXT: pand %xmm1, %xmm2
37901 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37902 ; SSSE3-NEXT: por %xmm2, %xmm0
37905 ; SSE41-LABEL: ult_60_v2i64:
37907 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37908 ; SSE41-NEXT: movdqa %xmm0, %xmm2
37909 ; SSE41-NEXT: pand %xmm1, %xmm2
37910 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37911 ; SSE41-NEXT: movdqa %xmm3, %xmm4
37912 ; SSE41-NEXT: pshufb %xmm2, %xmm4
37913 ; SSE41-NEXT: psrlw $4, %xmm0
37914 ; SSE41-NEXT: pand %xmm1, %xmm0
37915 ; SSE41-NEXT: pshufb %xmm0, %xmm3
37916 ; SSE41-NEXT: paddb %xmm4, %xmm3
37917 ; SSE41-NEXT: pxor %xmm0, %xmm0
37918 ; SSE41-NEXT: psadbw %xmm0, %xmm3
37919 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
37920 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
37921 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
37922 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483708,2147483708]
37923 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
37924 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
37925 ; SSE41-NEXT: pand %xmm1, %xmm2
37926 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
37927 ; SSE41-NEXT: por %xmm2, %xmm0
37930 ; AVX1-LABEL: ult_60_v2i64:
37932 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37933 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
37934 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37935 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37936 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
37937 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
37938 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37939 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37940 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
37941 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37942 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60]
37943 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37946 ; AVX2-LABEL: ult_60_v2i64:
37948 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
37949 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
37950 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
37951 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
37952 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
37953 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
37954 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
37955 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
37956 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
37957 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37958 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60]
37959 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37962 ; AVX512VPOPCNTDQ-LABEL: ult_60_v2i64:
37963 ; AVX512VPOPCNTDQ: # %bb.0:
37964 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37965 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
37966 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60]
37967 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37968 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
37969 ; AVX512VPOPCNTDQ-NEXT: retq
37971 ; AVX512VPOPCNTDQVL-LABEL: ult_60_v2i64:
37972 ; AVX512VPOPCNTDQVL: # %bb.0:
37973 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
37974 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37975 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37976 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37977 ; AVX512VPOPCNTDQVL-NEXT: retq
37979 ; BITALG_NOVLX-LABEL: ult_60_v2i64:
37980 ; BITALG_NOVLX: # %bb.0:
37981 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
37982 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
37983 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
37984 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37985 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60]
37986 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
37987 ; BITALG_NOVLX-NEXT: vzeroupper
37988 ; BITALG_NOVLX-NEXT: retq
37990 ; BITALG-LABEL: ult_60_v2i64:
37992 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
37993 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
37994 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
37995 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
37996 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
37997 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
37998 ; BITALG-NEXT: retq
37999 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38000 %3 = icmp ult <2 x i64> %2, <i64 60, i64 60>
38001 %4 = sext <2 x i1> %3 to <2 x i64>
38005 define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) {
38006 ; SSE2-LABEL: ugt_60_v2i64:
38008 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38009 ; SSE2-NEXT: psrlw $1, %xmm1
38010 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38011 ; SSE2-NEXT: psubb %xmm1, %xmm0
38012 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38013 ; SSE2-NEXT: movdqa %xmm0, %xmm2
38014 ; SSE2-NEXT: pand %xmm1, %xmm2
38015 ; SSE2-NEXT: psrlw $2, %xmm0
38016 ; SSE2-NEXT: pand %xmm1, %xmm0
38017 ; SSE2-NEXT: paddb %xmm2, %xmm0
38018 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38019 ; SSE2-NEXT: psrlw $4, %xmm1
38020 ; SSE2-NEXT: paddb %xmm0, %xmm1
38021 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38022 ; SSE2-NEXT: pxor %xmm0, %xmm0
38023 ; SSE2-NEXT: psadbw %xmm0, %xmm1
38024 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38025 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38026 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
38027 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38028 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
38029 ; SSE2-NEXT: pand %xmm2, %xmm3
38030 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
38031 ; SSE2-NEXT: por %xmm3, %xmm0
38034 ; SSE3-LABEL: ugt_60_v2i64:
38036 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38037 ; SSE3-NEXT: psrlw $1, %xmm1
38038 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38039 ; SSE3-NEXT: psubb %xmm1, %xmm0
38040 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38041 ; SSE3-NEXT: movdqa %xmm0, %xmm2
38042 ; SSE3-NEXT: pand %xmm1, %xmm2
38043 ; SSE3-NEXT: psrlw $2, %xmm0
38044 ; SSE3-NEXT: pand %xmm1, %xmm0
38045 ; SSE3-NEXT: paddb %xmm2, %xmm0
38046 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38047 ; SSE3-NEXT: psrlw $4, %xmm1
38048 ; SSE3-NEXT: paddb %xmm0, %xmm1
38049 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38050 ; SSE3-NEXT: pxor %xmm0, %xmm0
38051 ; SSE3-NEXT: psadbw %xmm0, %xmm1
38052 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38053 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38054 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
38055 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38056 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
38057 ; SSE3-NEXT: pand %xmm2, %xmm3
38058 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
38059 ; SSE3-NEXT: por %xmm3, %xmm0
38062 ; SSSE3-LABEL: ugt_60_v2i64:
38064 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38065 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
38066 ; SSSE3-NEXT: pand %xmm1, %xmm2
38067 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38068 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
38069 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
38070 ; SSSE3-NEXT: psrlw $4, %xmm0
38071 ; SSSE3-NEXT: pand %xmm1, %xmm0
38072 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
38073 ; SSSE3-NEXT: paddb %xmm4, %xmm3
38074 ; SSSE3-NEXT: pxor %xmm0, %xmm0
38075 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
38076 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38077 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38078 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
38079 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38080 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
38081 ; SSSE3-NEXT: pand %xmm1, %xmm2
38082 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
38083 ; SSSE3-NEXT: por %xmm2, %xmm0
38086 ; SSE41-LABEL: ugt_60_v2i64:
38088 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38089 ; SSE41-NEXT: movdqa %xmm0, %xmm2
38090 ; SSE41-NEXT: pand %xmm1, %xmm2
38091 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38092 ; SSE41-NEXT: movdqa %xmm3, %xmm4
38093 ; SSE41-NEXT: pshufb %xmm2, %xmm4
38094 ; SSE41-NEXT: psrlw $4, %xmm0
38095 ; SSE41-NEXT: pand %xmm1, %xmm0
38096 ; SSE41-NEXT: pshufb %xmm0, %xmm3
38097 ; SSE41-NEXT: paddb %xmm4, %xmm3
38098 ; SSE41-NEXT: pxor %xmm0, %xmm0
38099 ; SSE41-NEXT: psadbw %xmm0, %xmm3
38100 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38101 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38102 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
38103 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38104 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
38105 ; SSE41-NEXT: pand %xmm1, %xmm2
38106 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
38107 ; SSE41-NEXT: por %xmm2, %xmm0
38110 ; AVX1-LABEL: ugt_60_v2i64:
38112 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38113 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
38114 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38115 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
38116 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
38117 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
38118 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
38119 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
38120 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
38121 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38122 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38125 ; AVX2-LABEL: ugt_60_v2i64:
38127 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38128 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
38129 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38130 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
38131 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
38132 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
38133 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
38134 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
38135 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
38136 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38137 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38140 ; AVX512VPOPCNTDQ-LABEL: ugt_60_v2i64:
38141 ; AVX512VPOPCNTDQ: # %bb.0:
38142 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
38143 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
38144 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38145 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
38146 ; AVX512VPOPCNTDQ-NEXT: retq
38148 ; AVX512VPOPCNTDQVL-LABEL: ugt_60_v2i64:
38149 ; AVX512VPOPCNTDQVL: # %bb.0:
38150 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
38151 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
38152 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
38153 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38154 ; AVX512VPOPCNTDQVL-NEXT: retq
38156 ; BITALG_NOVLX-LABEL: ugt_60_v2i64:
38157 ; BITALG_NOVLX: # %bb.0:
38158 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
38159 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
38160 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
38161 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38162 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38163 ; BITALG_NOVLX-NEXT: vzeroupper
38164 ; BITALG_NOVLX-NEXT: retq
38166 ; BITALG-LABEL: ugt_60_v2i64:
38168 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
38169 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
38170 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38171 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
38172 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
38173 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38174 ; BITALG-NEXT: retq
38175 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38176 %3 = icmp ugt <2 x i64> %2, <i64 60, i64 60>
38177 %4 = sext <2 x i1> %3 to <2 x i64>
38181 define <2 x i64> @ult_61_v2i64(<2 x i64> %0) {
38182 ; SSE2-LABEL: ult_61_v2i64:
38184 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38185 ; SSE2-NEXT: psrlw $1, %xmm1
38186 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38187 ; SSE2-NEXT: psubb %xmm1, %xmm0
38188 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38189 ; SSE2-NEXT: movdqa %xmm0, %xmm2
38190 ; SSE2-NEXT: pand %xmm1, %xmm2
38191 ; SSE2-NEXT: psrlw $2, %xmm0
38192 ; SSE2-NEXT: pand %xmm1, %xmm0
38193 ; SSE2-NEXT: paddb %xmm2, %xmm0
38194 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38195 ; SSE2-NEXT: psrlw $4, %xmm1
38196 ; SSE2-NEXT: paddb %xmm0, %xmm1
38197 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38198 ; SSE2-NEXT: pxor %xmm0, %xmm0
38199 ; SSE2-NEXT: psadbw %xmm0, %xmm1
38200 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38201 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38202 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
38203 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483709,2147483709]
38204 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
38205 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
38206 ; SSE2-NEXT: pand %xmm2, %xmm1
38207 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38208 ; SSE2-NEXT: por %xmm1, %xmm0
38211 ; SSE3-LABEL: ult_61_v2i64:
38213 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38214 ; SSE3-NEXT: psrlw $1, %xmm1
38215 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38216 ; SSE3-NEXT: psubb %xmm1, %xmm0
38217 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38218 ; SSE3-NEXT: movdqa %xmm0, %xmm2
38219 ; SSE3-NEXT: pand %xmm1, %xmm2
38220 ; SSE3-NEXT: psrlw $2, %xmm0
38221 ; SSE3-NEXT: pand %xmm1, %xmm0
38222 ; SSE3-NEXT: paddb %xmm2, %xmm0
38223 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38224 ; SSE3-NEXT: psrlw $4, %xmm1
38225 ; SSE3-NEXT: paddb %xmm0, %xmm1
38226 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38227 ; SSE3-NEXT: pxor %xmm0, %xmm0
38228 ; SSE3-NEXT: psadbw %xmm0, %xmm1
38229 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38230 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38231 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
38232 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483709,2147483709]
38233 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
38234 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
38235 ; SSE3-NEXT: pand %xmm2, %xmm1
38236 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38237 ; SSE3-NEXT: por %xmm1, %xmm0
38240 ; SSSE3-LABEL: ult_61_v2i64:
38242 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38243 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
38244 ; SSSE3-NEXT: pand %xmm1, %xmm2
38245 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38246 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
38247 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
38248 ; SSSE3-NEXT: psrlw $4, %xmm0
38249 ; SSSE3-NEXT: pand %xmm1, %xmm0
38250 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
38251 ; SSSE3-NEXT: paddb %xmm4, %xmm3
38252 ; SSSE3-NEXT: pxor %xmm0, %xmm0
38253 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
38254 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38255 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38256 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
38257 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483709,2147483709]
38258 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
38259 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
38260 ; SSSE3-NEXT: pand %xmm1, %xmm2
38261 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38262 ; SSSE3-NEXT: por %xmm2, %xmm0
38265 ; SSE41-LABEL: ult_61_v2i64:
38267 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38268 ; SSE41-NEXT: movdqa %xmm0, %xmm2
38269 ; SSE41-NEXT: pand %xmm1, %xmm2
38270 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38271 ; SSE41-NEXT: movdqa %xmm3, %xmm4
38272 ; SSE41-NEXT: pshufb %xmm2, %xmm4
38273 ; SSE41-NEXT: psrlw $4, %xmm0
38274 ; SSE41-NEXT: pand %xmm1, %xmm0
38275 ; SSE41-NEXT: pshufb %xmm0, %xmm3
38276 ; SSE41-NEXT: paddb %xmm4, %xmm3
38277 ; SSE41-NEXT: pxor %xmm0, %xmm0
38278 ; SSE41-NEXT: psadbw %xmm0, %xmm3
38279 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38280 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38281 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
38282 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483709,2147483709]
38283 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
38284 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
38285 ; SSE41-NEXT: pand %xmm1, %xmm2
38286 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38287 ; SSE41-NEXT: por %xmm2, %xmm0
38290 ; AVX1-LABEL: ult_61_v2i64:
38292 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38293 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
38294 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38295 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
38296 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
38297 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
38298 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
38299 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
38300 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
38301 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38302 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61]
38303 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
38306 ; AVX2-LABEL: ult_61_v2i64:
38308 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38309 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
38310 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38311 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
38312 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
38313 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
38314 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
38315 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
38316 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
38317 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38318 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61]
38319 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
38322 ; AVX512VPOPCNTDQ-LABEL: ult_61_v2i64:
38323 ; AVX512VPOPCNTDQ: # %bb.0:
38324 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
38325 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
38326 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61]
38327 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
38328 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
38329 ; AVX512VPOPCNTDQ-NEXT: retq
38331 ; AVX512VPOPCNTDQVL-LABEL: ult_61_v2i64:
38332 ; AVX512VPOPCNTDQVL: # %bb.0:
38333 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
38334 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
38335 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
38336 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38337 ; AVX512VPOPCNTDQVL-NEXT: retq
38339 ; BITALG_NOVLX-LABEL: ult_61_v2i64:
38340 ; BITALG_NOVLX: # %bb.0:
38341 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
38342 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
38343 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
38344 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38345 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61]
38346 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
38347 ; BITALG_NOVLX-NEXT: vzeroupper
38348 ; BITALG_NOVLX-NEXT: retq
38350 ; BITALG-LABEL: ult_61_v2i64:
38352 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
38353 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
38354 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38355 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
38356 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
38357 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38358 ; BITALG-NEXT: retq
38359 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38360 %3 = icmp ult <2 x i64> %2, <i64 61, i64 61>
38361 %4 = sext <2 x i1> %3 to <2 x i64>
38365 define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) {
38366 ; SSE2-LABEL: ugt_61_v2i64:
38368 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38369 ; SSE2-NEXT: psrlw $1, %xmm1
38370 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38371 ; SSE2-NEXT: psubb %xmm1, %xmm0
38372 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38373 ; SSE2-NEXT: movdqa %xmm0, %xmm2
38374 ; SSE2-NEXT: pand %xmm1, %xmm2
38375 ; SSE2-NEXT: psrlw $2, %xmm0
38376 ; SSE2-NEXT: pand %xmm1, %xmm0
38377 ; SSE2-NEXT: paddb %xmm2, %xmm0
38378 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38379 ; SSE2-NEXT: psrlw $4, %xmm1
38380 ; SSE2-NEXT: paddb %xmm0, %xmm1
38381 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38382 ; SSE2-NEXT: pxor %xmm0, %xmm0
38383 ; SSE2-NEXT: psadbw %xmm0, %xmm1
38384 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38385 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38386 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
38387 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38388 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
38389 ; SSE2-NEXT: pand %xmm2, %xmm3
38390 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
38391 ; SSE2-NEXT: por %xmm3, %xmm0
38394 ; SSE3-LABEL: ugt_61_v2i64:
38396 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38397 ; SSE3-NEXT: psrlw $1, %xmm1
38398 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38399 ; SSE3-NEXT: psubb %xmm1, %xmm0
38400 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38401 ; SSE3-NEXT: movdqa %xmm0, %xmm2
38402 ; SSE3-NEXT: pand %xmm1, %xmm2
38403 ; SSE3-NEXT: psrlw $2, %xmm0
38404 ; SSE3-NEXT: pand %xmm1, %xmm0
38405 ; SSE3-NEXT: paddb %xmm2, %xmm0
38406 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38407 ; SSE3-NEXT: psrlw $4, %xmm1
38408 ; SSE3-NEXT: paddb %xmm0, %xmm1
38409 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38410 ; SSE3-NEXT: pxor %xmm0, %xmm0
38411 ; SSE3-NEXT: psadbw %xmm0, %xmm1
38412 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38413 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38414 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
38415 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38416 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
38417 ; SSE3-NEXT: pand %xmm2, %xmm3
38418 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
38419 ; SSE3-NEXT: por %xmm3, %xmm0
38422 ; SSSE3-LABEL: ugt_61_v2i64:
38424 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38425 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
38426 ; SSSE3-NEXT: pand %xmm1, %xmm2
38427 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38428 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
38429 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
38430 ; SSSE3-NEXT: psrlw $4, %xmm0
38431 ; SSSE3-NEXT: pand %xmm1, %xmm0
38432 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
38433 ; SSSE3-NEXT: paddb %xmm4, %xmm3
38434 ; SSSE3-NEXT: pxor %xmm0, %xmm0
38435 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
38436 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38437 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38438 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
38439 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38440 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
38441 ; SSSE3-NEXT: pand %xmm1, %xmm2
38442 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
38443 ; SSSE3-NEXT: por %xmm2, %xmm0
38446 ; SSE41-LABEL: ugt_61_v2i64:
38448 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38449 ; SSE41-NEXT: movdqa %xmm0, %xmm2
38450 ; SSE41-NEXT: pand %xmm1, %xmm2
38451 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38452 ; SSE41-NEXT: movdqa %xmm3, %xmm4
38453 ; SSE41-NEXT: pshufb %xmm2, %xmm4
38454 ; SSE41-NEXT: psrlw $4, %xmm0
38455 ; SSE41-NEXT: pand %xmm1, %xmm0
38456 ; SSE41-NEXT: pshufb %xmm0, %xmm3
38457 ; SSE41-NEXT: paddb %xmm4, %xmm3
38458 ; SSE41-NEXT: pxor %xmm0, %xmm0
38459 ; SSE41-NEXT: psadbw %xmm0, %xmm3
38460 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38461 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38462 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
38463 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38464 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
38465 ; SSE41-NEXT: pand %xmm1, %xmm2
38466 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
38467 ; SSE41-NEXT: por %xmm2, %xmm0
38470 ; AVX1-LABEL: ugt_61_v2i64:
38472 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38473 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
38474 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38475 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
38476 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
38477 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
38478 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
38479 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
38480 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
38481 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38482 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38485 ; AVX2-LABEL: ugt_61_v2i64:
38487 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38488 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
38489 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38490 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
38491 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
38492 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
38493 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
38494 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
38495 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
38496 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38497 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38500 ; AVX512VPOPCNTDQ-LABEL: ugt_61_v2i64:
38501 ; AVX512VPOPCNTDQ: # %bb.0:
38502 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
38503 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
38504 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38505 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
38506 ; AVX512VPOPCNTDQ-NEXT: retq
38508 ; AVX512VPOPCNTDQVL-LABEL: ugt_61_v2i64:
38509 ; AVX512VPOPCNTDQVL: # %bb.0:
38510 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
38511 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
38512 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
38513 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38514 ; AVX512VPOPCNTDQVL-NEXT: retq
38516 ; BITALG_NOVLX-LABEL: ugt_61_v2i64:
38517 ; BITALG_NOVLX: # %bb.0:
38518 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
38519 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
38520 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
38521 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38522 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38523 ; BITALG_NOVLX-NEXT: vzeroupper
38524 ; BITALG_NOVLX-NEXT: retq
38526 ; BITALG-LABEL: ugt_61_v2i64:
38528 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
38529 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
38530 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38531 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
38532 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
38533 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38534 ; BITALG-NEXT: retq
38535 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38536 %3 = icmp ugt <2 x i64> %2, <i64 61, i64 61>
38537 %4 = sext <2 x i1> %3 to <2 x i64>
38541 define <2 x i64> @ult_62_v2i64(<2 x i64> %0) {
38542 ; SSE2-LABEL: ult_62_v2i64:
38544 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38545 ; SSE2-NEXT: psrlw $1, %xmm1
38546 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38547 ; SSE2-NEXT: psubb %xmm1, %xmm0
38548 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38549 ; SSE2-NEXT: movdqa %xmm0, %xmm2
38550 ; SSE2-NEXT: pand %xmm1, %xmm2
38551 ; SSE2-NEXT: psrlw $2, %xmm0
38552 ; SSE2-NEXT: pand %xmm1, %xmm0
38553 ; SSE2-NEXT: paddb %xmm2, %xmm0
38554 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38555 ; SSE2-NEXT: psrlw $4, %xmm1
38556 ; SSE2-NEXT: paddb %xmm0, %xmm1
38557 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38558 ; SSE2-NEXT: pxor %xmm0, %xmm0
38559 ; SSE2-NEXT: psadbw %xmm0, %xmm1
38560 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38561 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38562 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
38563 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483710,2147483710]
38564 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
38565 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
38566 ; SSE2-NEXT: pand %xmm2, %xmm1
38567 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38568 ; SSE2-NEXT: por %xmm1, %xmm0
38571 ; SSE3-LABEL: ult_62_v2i64:
38573 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38574 ; SSE3-NEXT: psrlw $1, %xmm1
38575 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38576 ; SSE3-NEXT: psubb %xmm1, %xmm0
38577 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38578 ; SSE3-NEXT: movdqa %xmm0, %xmm2
38579 ; SSE3-NEXT: pand %xmm1, %xmm2
38580 ; SSE3-NEXT: psrlw $2, %xmm0
38581 ; SSE3-NEXT: pand %xmm1, %xmm0
38582 ; SSE3-NEXT: paddb %xmm2, %xmm0
38583 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38584 ; SSE3-NEXT: psrlw $4, %xmm1
38585 ; SSE3-NEXT: paddb %xmm0, %xmm1
38586 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38587 ; SSE3-NEXT: pxor %xmm0, %xmm0
38588 ; SSE3-NEXT: psadbw %xmm0, %xmm1
38589 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38590 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38591 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
38592 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483710,2147483710]
38593 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
38594 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
38595 ; SSE3-NEXT: pand %xmm2, %xmm1
38596 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38597 ; SSE3-NEXT: por %xmm1, %xmm0
38600 ; SSSE3-LABEL: ult_62_v2i64:
38602 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38603 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
38604 ; SSSE3-NEXT: pand %xmm1, %xmm2
38605 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38606 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
38607 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
38608 ; SSSE3-NEXT: psrlw $4, %xmm0
38609 ; SSSE3-NEXT: pand %xmm1, %xmm0
38610 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
38611 ; SSSE3-NEXT: paddb %xmm4, %xmm3
38612 ; SSSE3-NEXT: pxor %xmm0, %xmm0
38613 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
38614 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38615 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38616 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
38617 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483710,2147483710]
38618 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
38619 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
38620 ; SSSE3-NEXT: pand %xmm1, %xmm2
38621 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38622 ; SSSE3-NEXT: por %xmm2, %xmm0
38625 ; SSE41-LABEL: ult_62_v2i64:
38627 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38628 ; SSE41-NEXT: movdqa %xmm0, %xmm2
38629 ; SSE41-NEXT: pand %xmm1, %xmm2
38630 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38631 ; SSE41-NEXT: movdqa %xmm3, %xmm4
38632 ; SSE41-NEXT: pshufb %xmm2, %xmm4
38633 ; SSE41-NEXT: psrlw $4, %xmm0
38634 ; SSE41-NEXT: pand %xmm1, %xmm0
38635 ; SSE41-NEXT: pshufb %xmm0, %xmm3
38636 ; SSE41-NEXT: paddb %xmm4, %xmm3
38637 ; SSE41-NEXT: pxor %xmm0, %xmm0
38638 ; SSE41-NEXT: psadbw %xmm0, %xmm3
38639 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38640 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38641 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
38642 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483710,2147483710]
38643 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
38644 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
38645 ; SSE41-NEXT: pand %xmm1, %xmm2
38646 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38647 ; SSE41-NEXT: por %xmm2, %xmm0
38650 ; AVX1-LABEL: ult_62_v2i64:
38652 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38653 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
38654 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38655 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
38656 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
38657 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
38658 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
38659 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
38660 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
38661 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38662 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62]
38663 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
38666 ; AVX2-LABEL: ult_62_v2i64:
38668 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38669 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
38670 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38671 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
38672 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
38673 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
38674 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
38675 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
38676 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
38677 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38678 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62]
38679 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
38682 ; AVX512VPOPCNTDQ-LABEL: ult_62_v2i64:
38683 ; AVX512VPOPCNTDQ: # %bb.0:
38684 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
38685 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
38686 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62]
38687 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
38688 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
38689 ; AVX512VPOPCNTDQ-NEXT: retq
38691 ; AVX512VPOPCNTDQVL-LABEL: ult_62_v2i64:
38692 ; AVX512VPOPCNTDQVL: # %bb.0:
38693 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
38694 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
38695 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
38696 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38697 ; AVX512VPOPCNTDQVL-NEXT: retq
38699 ; BITALG_NOVLX-LABEL: ult_62_v2i64:
38700 ; BITALG_NOVLX: # %bb.0:
38701 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
38702 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
38703 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
38704 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38705 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62]
38706 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
38707 ; BITALG_NOVLX-NEXT: vzeroupper
38708 ; BITALG_NOVLX-NEXT: retq
38710 ; BITALG-LABEL: ult_62_v2i64:
38712 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
38713 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
38714 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38715 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
38716 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
38717 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38718 ; BITALG-NEXT: retq
38719 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38720 %3 = icmp ult <2 x i64> %2, <i64 62, i64 62>
38721 %4 = sext <2 x i1> %3 to <2 x i64>
38725 define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) {
38726 ; SSE2-LABEL: ugt_62_v2i64:
38728 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38729 ; SSE2-NEXT: psrlw $1, %xmm1
38730 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38731 ; SSE2-NEXT: psubb %xmm1, %xmm0
38732 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38733 ; SSE2-NEXT: movdqa %xmm0, %xmm2
38734 ; SSE2-NEXT: pand %xmm1, %xmm2
38735 ; SSE2-NEXT: psrlw $2, %xmm0
38736 ; SSE2-NEXT: pand %xmm1, %xmm0
38737 ; SSE2-NEXT: paddb %xmm2, %xmm0
38738 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38739 ; SSE2-NEXT: psrlw $4, %xmm1
38740 ; SSE2-NEXT: paddb %xmm0, %xmm1
38741 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38742 ; SSE2-NEXT: pxor %xmm0, %xmm0
38743 ; SSE2-NEXT: psadbw %xmm0, %xmm1
38744 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38745 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38746 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
38747 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38748 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
38749 ; SSE2-NEXT: pand %xmm2, %xmm3
38750 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
38751 ; SSE2-NEXT: por %xmm3, %xmm0
38754 ; SSE3-LABEL: ugt_62_v2i64:
38756 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38757 ; SSE3-NEXT: psrlw $1, %xmm1
38758 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38759 ; SSE3-NEXT: psubb %xmm1, %xmm0
38760 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38761 ; SSE3-NEXT: movdqa %xmm0, %xmm2
38762 ; SSE3-NEXT: pand %xmm1, %xmm2
38763 ; SSE3-NEXT: psrlw $2, %xmm0
38764 ; SSE3-NEXT: pand %xmm1, %xmm0
38765 ; SSE3-NEXT: paddb %xmm2, %xmm0
38766 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38767 ; SSE3-NEXT: psrlw $4, %xmm1
38768 ; SSE3-NEXT: paddb %xmm0, %xmm1
38769 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38770 ; SSE3-NEXT: pxor %xmm0, %xmm0
38771 ; SSE3-NEXT: psadbw %xmm0, %xmm1
38772 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38773 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38774 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
38775 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38776 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
38777 ; SSE3-NEXT: pand %xmm2, %xmm3
38778 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
38779 ; SSE3-NEXT: por %xmm3, %xmm0
38782 ; SSSE3-LABEL: ugt_62_v2i64:
38784 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38785 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
38786 ; SSSE3-NEXT: pand %xmm1, %xmm2
38787 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38788 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
38789 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
38790 ; SSSE3-NEXT: psrlw $4, %xmm0
38791 ; SSSE3-NEXT: pand %xmm1, %xmm0
38792 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
38793 ; SSSE3-NEXT: paddb %xmm4, %xmm3
38794 ; SSSE3-NEXT: pxor %xmm0, %xmm0
38795 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
38796 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38797 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38798 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
38799 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38800 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
38801 ; SSSE3-NEXT: pand %xmm1, %xmm2
38802 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
38803 ; SSSE3-NEXT: por %xmm2, %xmm0
38806 ; SSE41-LABEL: ugt_62_v2i64:
38808 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38809 ; SSE41-NEXT: movdqa %xmm0, %xmm2
38810 ; SSE41-NEXT: pand %xmm1, %xmm2
38811 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38812 ; SSE41-NEXT: movdqa %xmm3, %xmm4
38813 ; SSE41-NEXT: pshufb %xmm2, %xmm4
38814 ; SSE41-NEXT: psrlw $4, %xmm0
38815 ; SSE41-NEXT: pand %xmm1, %xmm0
38816 ; SSE41-NEXT: pshufb %xmm0, %xmm3
38817 ; SSE41-NEXT: paddb %xmm4, %xmm3
38818 ; SSE41-NEXT: pxor %xmm0, %xmm0
38819 ; SSE41-NEXT: psadbw %xmm0, %xmm3
38820 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38821 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38822 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
38823 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38824 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
38825 ; SSE41-NEXT: pand %xmm1, %xmm2
38826 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
38827 ; SSE41-NEXT: por %xmm2, %xmm0
38830 ; AVX1-LABEL: ugt_62_v2i64:
38832 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38833 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
38834 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38835 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
38836 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
38837 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
38838 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
38839 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
38840 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
38841 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38842 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38845 ; AVX2-LABEL: ugt_62_v2i64:
38847 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38848 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
38849 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38850 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
38851 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
38852 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
38853 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
38854 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
38855 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
38856 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38857 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38860 ; AVX512VPOPCNTDQ-LABEL: ugt_62_v2i64:
38861 ; AVX512VPOPCNTDQ: # %bb.0:
38862 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
38863 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
38864 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38865 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
38866 ; AVX512VPOPCNTDQ-NEXT: retq
38868 ; AVX512VPOPCNTDQVL-LABEL: ugt_62_v2i64:
38869 ; AVX512VPOPCNTDQVL: # %bb.0:
38870 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
38871 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
38872 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
38873 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38874 ; AVX512VPOPCNTDQVL-NEXT: retq
38876 ; BITALG_NOVLX-LABEL: ugt_62_v2i64:
38877 ; BITALG_NOVLX: # %bb.0:
38878 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
38879 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
38880 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
38881 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38882 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38883 ; BITALG_NOVLX-NEXT: vzeroupper
38884 ; BITALG_NOVLX-NEXT: retq
38886 ; BITALG-LABEL: ugt_62_v2i64:
38888 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
38889 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
38890 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
38891 ; BITALG-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
38892 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
38893 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
38894 ; BITALG-NEXT: retq
38895 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
38896 %3 = icmp ugt <2 x i64> %2, <i64 62, i64 62>
38897 %4 = sext <2 x i1> %3 to <2 x i64>
38901 define <2 x i64> @ult_63_v2i64(<2 x i64> %0) {
38902 ; SSE2-LABEL: ult_63_v2i64:
38904 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38905 ; SSE2-NEXT: psrlw $1, %xmm1
38906 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38907 ; SSE2-NEXT: psubb %xmm1, %xmm0
38908 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38909 ; SSE2-NEXT: movdqa %xmm0, %xmm2
38910 ; SSE2-NEXT: pand %xmm1, %xmm2
38911 ; SSE2-NEXT: psrlw $2, %xmm0
38912 ; SSE2-NEXT: pand %xmm1, %xmm0
38913 ; SSE2-NEXT: paddb %xmm2, %xmm0
38914 ; SSE2-NEXT: movdqa %xmm0, %xmm1
38915 ; SSE2-NEXT: psrlw $4, %xmm1
38916 ; SSE2-NEXT: paddb %xmm0, %xmm1
38917 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38918 ; SSE2-NEXT: pxor %xmm0, %xmm0
38919 ; SSE2-NEXT: psadbw %xmm0, %xmm1
38920 ; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38921 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38922 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
38923 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483711,2147483711]
38924 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
38925 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
38926 ; SSE2-NEXT: pand %xmm2, %xmm1
38927 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38928 ; SSE2-NEXT: por %xmm1, %xmm0
38931 ; SSE3-LABEL: ult_63_v2i64:
38933 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38934 ; SSE3-NEXT: psrlw $1, %xmm1
38935 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38936 ; SSE3-NEXT: psubb %xmm1, %xmm0
38937 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
38938 ; SSE3-NEXT: movdqa %xmm0, %xmm2
38939 ; SSE3-NEXT: pand %xmm1, %xmm2
38940 ; SSE3-NEXT: psrlw $2, %xmm0
38941 ; SSE3-NEXT: pand %xmm1, %xmm0
38942 ; SSE3-NEXT: paddb %xmm2, %xmm0
38943 ; SSE3-NEXT: movdqa %xmm0, %xmm1
38944 ; SSE3-NEXT: psrlw $4, %xmm1
38945 ; SSE3-NEXT: paddb %xmm0, %xmm1
38946 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38947 ; SSE3-NEXT: pxor %xmm0, %xmm0
38948 ; SSE3-NEXT: psadbw %xmm0, %xmm1
38949 ; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
38950 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
38951 ; SSE3-NEXT: pcmpeqd %xmm0, %xmm2
38952 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483711,2147483711]
38953 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
38954 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
38955 ; SSE3-NEXT: pand %xmm2, %xmm1
38956 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38957 ; SSE3-NEXT: por %xmm1, %xmm0
38960 ; SSSE3-LABEL: ult_63_v2i64:
38962 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38963 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
38964 ; SSSE3-NEXT: pand %xmm1, %xmm2
38965 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38966 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
38967 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
38968 ; SSSE3-NEXT: psrlw $4, %xmm0
38969 ; SSSE3-NEXT: pand %xmm1, %xmm0
38970 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
38971 ; SSSE3-NEXT: paddb %xmm4, %xmm3
38972 ; SSSE3-NEXT: pxor %xmm0, %xmm0
38973 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
38974 ; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
38975 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
38976 ; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
38977 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483711,2147483711]
38978 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
38979 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
38980 ; SSSE3-NEXT: pand %xmm1, %xmm2
38981 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
38982 ; SSSE3-NEXT: por %xmm2, %xmm0
38985 ; SSE41-LABEL: ult_63_v2i64:
38987 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
38988 ; SSE41-NEXT: movdqa %xmm0, %xmm2
38989 ; SSE41-NEXT: pand %xmm1, %xmm2
38990 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
38991 ; SSE41-NEXT: movdqa %xmm3, %xmm4
38992 ; SSE41-NEXT: pshufb %xmm2, %xmm4
38993 ; SSE41-NEXT: psrlw $4, %xmm0
38994 ; SSE41-NEXT: pand %xmm1, %xmm0
38995 ; SSE41-NEXT: pshufb %xmm0, %xmm3
38996 ; SSE41-NEXT: paddb %xmm4, %xmm3
38997 ; SSE41-NEXT: pxor %xmm0, %xmm0
38998 ; SSE41-NEXT: psadbw %xmm0, %xmm3
38999 ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
39000 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
39001 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
39002 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483711,2147483711]
39003 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0
39004 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
39005 ; SSE41-NEXT: pand %xmm1, %xmm2
39006 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
39007 ; SSE41-NEXT: por %xmm2, %xmm0
39010 ; AVX1-LABEL: ult_63_v2i64:
39012 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39013 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
39014 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39015 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
39016 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
39017 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
39018 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
39019 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
39020 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
39021 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
39022 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63]
39023 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
39026 ; AVX2-LABEL: ult_63_v2i64:
39028 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
39029 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
39030 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
39031 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
39032 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
39033 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
39034 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
39035 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
39036 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
39037 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
39038 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63]
39039 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
39042 ; AVX512VPOPCNTDQ-LABEL: ult_63_v2i64:
39043 ; AVX512VPOPCNTDQ: # %bb.0:
39044 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
39045 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
39046 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63]
39047 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
39048 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
39049 ; AVX512VPOPCNTDQ-NEXT: retq
39051 ; AVX512VPOPCNTDQVL-LABEL: ult_63_v2i64:
39052 ; AVX512VPOPCNTDQVL: # %bb.0:
39053 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
39054 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
39055 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
39056 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39057 ; AVX512VPOPCNTDQVL-NEXT: retq
39059 ; BITALG_NOVLX-LABEL: ult_63_v2i64:
39060 ; BITALG_NOVLX: # %bb.0:
39061 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
39062 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
39063 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
39064 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
39065 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63]
39066 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
39067 ; BITALG_NOVLX-NEXT: vzeroupper
39068 ; BITALG_NOVLX-NEXT: retq
39070 ; BITALG-LABEL: ult_63_v2i64:
39072 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
39073 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
39074 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
39075 ; BITALG-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
39076 ; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
39077 ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
39078 ; BITALG-NEXT: retq
39079 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
39080 %3 = icmp ult <2 x i64> %2, <i64 63, i64 63>
39081 %4 = sext <2 x i1> %3 to <2 x i64>
39085 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
39086 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
39087 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
39088 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)